target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294 }
 295
 296 /*
 297  *** SVE Integer Arithmetic - Unpredicated Group
 298  */
 299
 300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321 {
 322     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323 }
 324
 325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326 {
 327     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328 }
 329
 330 /*
 331  *** SVE Integer Arithmetic - Binary Predicated Group
 332  */
 333
 334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335 {
 336     unsigned vsz = vec_full_reg_size(s);
 337     if (fn == NULL) {
 338         return false;
 339     }
 340     if (sve_access_check(s)) {
 341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                            vec_full_reg_offset(s, a->rn),
 343                            vec_full_reg_offset(s, a->rm),
 344                            pred_full_reg_offset(s, a->pg),
 345                            vsz, vsz, 0, fn);
 346     }
 347     return true;
 348 }
 349
 350 /* Select active elememnts from Zn and inactive elements from Zm,
 351  * storing the result in Zd.
 352  */
 353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354 {
 355     static gen_helper_gvec_4 * const fns[4] = {
 356         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358     };
 359     unsigned vsz = vec_full_reg_size(s);
 360     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                        vec_full_reg_offset(s, rn),
 362                        vec_full_reg_offset(s, rm),
 363                        pred_full_reg_offset(s, pg),
 364                        vsz, vsz, 0, fns[esz]);
 365 }
 366
 367 #define DO_ZPZZ(NAME, name) \
 368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369 {                                                                         \
 370     static gen_helper_gvec_4 * const fns[4] = {                           \
 371         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373     };                                                                    \
 374     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375 }
 376
 377 DO_ZPZZ(AND, and)
 378 DO_ZPZZ(EOR, eor)
 379 DO_ZPZZ(ORR, orr)
 380 DO_ZPZZ(BIC, bic)
 381
 382 DO_ZPZZ(ADD, add)
 383 DO_ZPZZ(SUB, sub)
 384
 385 DO_ZPZZ(SMAX, smax)
 386 DO_ZPZZ(UMAX, umax)
 387 DO_ZPZZ(SMIN, smin)
 388 DO_ZPZZ(UMIN, umin)
 389 DO_ZPZZ(SABD, sabd)
 390 DO_ZPZZ(UABD, uabd)
 391
 392 DO_ZPZZ(MUL, mul)
 393 DO_ZPZZ(SMULH, smulh)
 394 DO_ZPZZ(UMULH, umulh)
 395
 396 DO_ZPZZ(ASR, asr)
 397 DO_ZPZZ(LSR, lsr)
 398 DO_ZPZZ(LSL, lsl)
 399
 400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401 {
 402     static gen_helper_gvec_4 * const fns[4] = {
 403         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404     };
 405     return do_zpzz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409 {
 410     static gen_helper_gvec_4 * const fns[4] = {
 411         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412     };
 413     return do_zpzz_ool(s, a, fns[a->esz]);
 414 }
 415
 416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417 {
 418     if (sve_access_check(s)) {
 419         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420     }
 421     return true;
 422 }
 423
 424 #undef DO_ZPZZ
 425
 426 /*
 427  *** SVE Integer Arithmetic - Unary Predicated Group
 428  */
 429
 430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431 {
 432     if (fn == NULL) {
 433         return false;
 434     }
 435     if (sve_access_check(s)) {
 436         unsigned vsz = vec_full_reg_size(s);
 437         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                            vec_full_reg_offset(s, a->rn),
 439                            pred_full_reg_offset(s, a->pg),
 440                            vsz, vsz, 0, fn);
 441     }
 442     return true;
 443 }
 444
 445 #define DO_ZPZ(NAME, name) \
 446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447 {                                                                   \
 448     static gen_helper_gvec_3 * const fns[4] = {                     \
 449         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451     };                                                              \
 452     return do_zpz_ool(s, a, fns[a->esz]);                           \
 453 }
 454
 455 DO_ZPZ(CLS, cls)
 456 DO_ZPZ(CLZ, clz)
 457 DO_ZPZ(CNT_zpz, cnt_zpz)
 458 DO_ZPZ(CNOT, cnot)
 459 DO_ZPZ(NOT_zpz, not_zpz)
 460 DO_ZPZ(ABS, abs)
 461 DO_ZPZ(NEG, neg)
 462
 463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL,
 467         gen_helper_sve_fabs_h,
 468         gen_helper_sve_fabs_s,
 469         gen_helper_sve_fabs_d
 470     };
 471     return do_zpz_ool(s, a, fns[a->esz]);
 472 }
 473
 474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475 {
 476     static gen_helper_gvec_3 * const fns[4] = {
 477         NULL,
 478         gen_helper_sve_fneg_h,
 479         gen_helper_sve_fneg_s,
 480         gen_helper_sve_fneg_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL,
 489         gen_helper_sve_sxtb_h,
 490         gen_helper_sve_sxtb_s,
 491         gen_helper_sve_sxtb_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL,
 500         gen_helper_sve_uxtb_h,
 501         gen_helper_sve_uxtb_s,
 502         gen_helper_sve_uxtb_d
 503     };
 504     return do_zpz_ool(s, a, fns[a->esz]);
 505 }
 506
 507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508 {
 509     static gen_helper_gvec_3 * const fns[4] = {
 510         NULL, NULL,
 511         gen_helper_sve_sxth_s,
 512         gen_helper_sve_sxth_d
 513     };
 514     return do_zpz_ool(s, a, fns[a->esz]);
 515 }
 516
 517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518 {
 519     static gen_helper_gvec_3 * const fns[4] = {
 520         NULL, NULL,
 521         gen_helper_sve_uxth_s,
 522         gen_helper_sve_uxth_d
 523     };
 524     return do_zpz_ool(s, a, fns[a->esz]);
 525 }
 526
 527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528 {
 529     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530 }
 531
 532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535 }
 536
 537 #undef DO_ZPZ
 538
 539 /*
 540  *** SVE Integer Reduction Group
 541  */
 542
 543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                        gen_helper_gvec_reduc *fn)
 546 {
 547     unsigned vsz = vec_full_reg_size(s);
 548     TCGv_ptr t_zn, t_pg;
 549     TCGv_i32 desc;
 550     TCGv_i64 temp;
 551
 552     if (fn == NULL) {
 553         return false;
 554     }
 555     if (!sve_access_check(s)) {
 556         return true;
 557     }
 558
 559     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560     temp = tcg_temp_new_i64();
 561     t_zn = tcg_temp_new_ptr();
 562     t_pg = tcg_temp_new_ptr();
 563
 564     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566     fn(temp, t_zn, t_pg, desc);
 567     tcg_temp_free_ptr(t_zn);
 568     tcg_temp_free_ptr(t_pg);
 569     tcg_temp_free_i32(desc);
 570
 571     write_fp_dreg(s, a->rd, temp);
 572     tcg_temp_free_i64(temp);
 573     return true;
 574 }
 575
 576 #define DO_VPZ(NAME, name) \
 577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578 {                                                                        \
 579     static gen_helper_gvec_reduc * const fns[4] = {                      \
 580         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582     };                                                                   \
 583     return do_vpz_ool(s, a, fns[a->esz]);                                \
 584 }
 585
 586 DO_VPZ(ORV, orv)
 587 DO_VPZ(ANDV, andv)
 588 DO_VPZ(EORV, eorv)
 589
 590 DO_VPZ(UADDV, uaddv)
 591 DO_VPZ(SMAXV, smaxv)
 592 DO_VPZ(UMAXV, umaxv)
 593 DO_VPZ(SMINV, sminv)
 594 DO_VPZ(UMINV, uminv)
 595
 596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597 {
 598     static gen_helper_gvec_reduc * const fns[4] = {
 599         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600         gen_helper_sve_saddv_s, NULL
 601     };
 602     return do_vpz_ool(s, a, fns[a->esz]);
 603 }
 604
 605 #undef DO_VPZ
 606
 607 /*
 608  *** SVE Shift by Immediate - Predicated Group
 609  */
 610
 611 /* Store zero into every active element of Zd.  We will use this for two
 612  * and three-operand predicated instructions for which logic dictates a
 613  * zero result.
 614  */
 615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616 {
 617     static gen_helper_gvec_2 * const fns[4] = {
 618         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620     };
 621     if (sve_access_check(s)) {
 622         unsigned vsz = vec_full_reg_size(s);
 623         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                            pred_full_reg_offset(s, pg),
 625                            vsz, vsz, 0, fns[esz]);
 626     }
 627     return true;
 628 }
 629
 630 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636     };
 637     unsigned vsz = vec_full_reg_size(s);
 638     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                        vec_full_reg_offset(s, rn),
 640                        pred_full_reg_offset(s, pg),
 641                        vsz, vsz, 0, fns[esz]);
 642 }
 643
 644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                         gen_helper_gvec_3 *fn)
 646 {
 647     if (sve_access_check(s)) {
 648         unsigned vsz = vec_full_reg_size(s);
 649         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                            vec_full_reg_offset(s, a->rn),
 651                            pred_full_reg_offset(s, a->pg),
 652                            vsz, vsz, a->imm, fn);
 653     }
 654     return true;
 655 }
 656
 657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658 {
 659     static gen_helper_gvec_3 * const fns[4] = {
 660         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662     };
 663     if (a->esz < 0) {
 664         /* Invalid tsz encoding -- see tszimm_esz. */
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For
 668        arithmetic right-shift, it's the same as by one less. */
 669     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670     return do_zpzi_ool(s, a, fns[a->esz]);
 671 }
 672
 673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.
 701        For logical shifts, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710 {
 711     static gen_helper_gvec_3 * const fns[4] = {
 712         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714     };
 715     if (a->esz < 0) {
 716         return false;
 717     }
 718     /* Shift by element size is architecturally valid.  For arithmetic
 719        right shift for division, it is a zeroing operation.  */
 720     if (a->imm >= (8 << a->esz)) {
 721         return do_clr_zp(s, a->rd, a->pg, a->esz);
 722     } else {
 723         return do_zpzi_ool(s, a, fns[a->esz]);
 724     }
 725 }
 726
 727 /*
 728  *** SVE Bitwise Shift - Predicated Group
 729  */
 730
 731 #define DO_ZPZW(NAME, name) \
 732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733 {                                                                         \
 734     static gen_helper_gvec_4 * const fns[3] = {                           \
 735         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736         gen_helper_sve_##name##_zpzw_s,                                   \
 737     };                                                                    \
 738     if (a->esz < 0 || a->esz >= 3) {                                      \
 739         return false;                                                     \
 740     }                                                                     \
 741     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742 }
 743
 744 DO_ZPZW(ASR, asr)
 745 DO_ZPZW(LSR, lsr)
 746 DO_ZPZW(LSL, lsl)
 747
 748 #undef DO_ZPZW
 749
 750 /*
 751  *** SVE Bitwise Shift - Unpredicated Group
 752  */
 753
 754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                          int64_t, uint32_t, uint32_t))
 757 {
 758     if (a->esz < 0) {
 759         /* Invalid tsz encoding -- see tszimm_esz. */
 760         return false;
 761     }
 762     if (sve_access_check(s)) {
 763         unsigned vsz = vec_full_reg_size(s);
 764         /* Shift by element size is architecturally valid.  For
 765            arithmetic right-shift, it's the same as by one less.
 766            Otherwise it is a zeroing operation.  */
 767         if (a->imm >= 8 << a->esz) {
 768             if (asr) {
 769                 a->imm = (8 << a->esz) - 1;
 770             } else {
 771                 do_dupi_z(s, a->rd, 0);
 772                 return true;
 773             }
 774         }
 775         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777     }
 778     return true;
 779 }
 780
 781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782 {
 783     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784 }
 785
 786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787 {
 788     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789 }
 790
 791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792 {
 793     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794 }
 795
 796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797 {
 798     if (fn == NULL) {
 799         return false;
 800     }
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->rn),
 805                            vec_full_reg_offset(s, a->rm),
 806                            vsz, vsz, 0, fn);
 807     }
 808     return true;
 809 }
 810
 811 #define DO_ZZW(NAME, name) \
 812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813 {                                                                         \
 814     static gen_helper_gvec_3 * const fns[4] = {                           \
 815         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816         gen_helper_sve_##name##_zzw_s, NULL                               \
 817     };                                                                    \
 818     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819 }
 820
 821 DO_ZZW(ASR, asr)
 822 DO_ZZW(LSR, lsr)
 823 DO_ZZW(LSL, lsl)
 824
 825 #undef DO_ZZW
 826
 827 /*
 828  *** SVE Integer Multiply-Add Group
 829  */
 830
 831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                          gen_helper_gvec_5 *fn)
 833 {
 834     if (sve_access_check(s)) {
 835         unsigned vsz = vec_full_reg_size(s);
 836         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                            vec_full_reg_offset(s, a->ra),
 838                            vec_full_reg_offset(s, a->rn),
 839                            vec_full_reg_offset(s, a->rm),
 840                            pred_full_reg_offset(s, a->pg),
 841                            vsz, vsz, 0, fn);
 842     }
 843     return true;
 844 }
 845
 846 #define DO_ZPZZZ(NAME, name) \
 847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848 {                                                                    \
 849     static gen_helper_gvec_5 * const fns[4] = {                      \
 850         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852     };                                                               \
 853     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854 }
 855
 856 DO_ZPZZZ(MLA, mla)
 857 DO_ZPZZZ(MLS, mls)
 858
 859 #undef DO_ZPZZZ
 860
 861 /*
 862  *** SVE Index Generation Group
 863  */
 864
 865 static void do_index(DisasContext *s, int esz, int rd,
 866                      TCGv_i64 start, TCGv_i64 incr)
 867 {
 868     unsigned vsz = vec_full_reg_size(s);
 869     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870     TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873     if (esz == 3) {
 874         gen_helper_sve_index_d(t_zd, start, incr, desc);
 875     } else {
 876         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877         static index_fn * const fns[3] = {
 878             gen_helper_sve_index_b,
 879             gen_helper_sve_index_h,
 880             gen_helper_sve_index_s,
 881         };
 882         TCGv_i32 s32 = tcg_temp_new_i32();
 883         TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885         tcg_gen_extrl_i64_i32(s32, start);
 886         tcg_gen_extrl_i64_i32(i32, incr);
 887         fns[esz](t_zd, s32, i32, desc);
 888
 889         tcg_temp_free_i32(s32);
 890         tcg_temp_free_i32(i32);
 891     }
 892     tcg_temp_free_ptr(t_zd);
 893     tcg_temp_free_i32(desc);
 894 }
 895
 896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897 {
 898     if (sve_access_check(s)) {
 899         TCGv_i64 start = tcg_const_i64(a->imm1);
 900         TCGv_i64 incr = tcg_const_i64(a->imm2);
 901         do_index(s, a->esz, a->rd, start, incr);
 902         tcg_temp_free_i64(start);
 903         tcg_temp_free_i64(incr);
 904     }
 905     return true;
 906 }
 907
 908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909 {
 910     if (sve_access_check(s)) {
 911         TCGv_i64 start = tcg_const_i64(a->imm);
 912         TCGv_i64 incr = cpu_reg(s, a->rm);
 913         do_index(s, a->esz, a->rd, start, incr);
 914         tcg_temp_free_i64(start);
 915     }
 916     return true;
 917 }
 918
 919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920 {
 921     if (sve_access_check(s)) {
 922         TCGv_i64 start = cpu_reg(s, a->rn);
 923         TCGv_i64 incr = tcg_const_i64(a->imm);
 924         do_index(s, a->esz, a->rd, start, incr);
 925         tcg_temp_free_i64(incr);
 926     }
 927     return true;
 928 }
 929
 930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931 {
 932     if (sve_access_check(s)) {
 933         TCGv_i64 start = cpu_reg(s, a->rn);
 934         TCGv_i64 incr = cpu_reg(s, a->rm);
 935         do_index(s, a->esz, a->rd, start, incr);
 936     }
 937     return true;
 938 }
 939
 940 /*
 941  *** SVE Stack Allocation Group
 942  */
 943
 944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945 {
 946     if (sve_access_check(s)) {
 947         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950     }
 951     return true;
 952 }
 953
 954 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955 {
 956     if (sve_access_check(s)) {
 957         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960     }
 961     return true;
 962 }
 963
 964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965 {
 966     if (sve_access_check(s)) {
 967         TCGv_i64 reg = cpu_reg(s, a->rd);
 968         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969     }
 970     return true;
 971 }
 972
 973 /*
 974  *** SVE Compute Vector Address Group
 975  */
 976
 977 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978 {
 979     if (sve_access_check(s)) {
 980         unsigned vsz = vec_full_reg_size(s);
 981         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                            vec_full_reg_offset(s, a->rn),
 983                            vec_full_reg_offset(s, a->rm),
 984                            vsz, vsz, a->imm, fn);
 985     }
 986     return true;
 987 }
 988
 989 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990 {
 991     return do_adr(s, a, gen_helper_sve_adr_p32);
 992 }
 993
 994 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995 {
 996     return do_adr(s, a, gen_helper_sve_adr_p64);
 997 }
 998
 999 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000 {
1001     return do_adr(s, a, gen_helper_sve_adr_s32);
1002 }
1003
1004 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005 {
1006     return do_adr(s, a, gen_helper_sve_adr_u32);
1007 }
1008
1009 /*
1010  *** SVE Integer Misc - Unpredicated Group
1011  */
1012
1013 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014 {
1015     static gen_helper_gvec_2 * const fns[4] = {
1016         NULL,
1017         gen_helper_sve_fexpa_h,
1018         gen_helper_sve_fexpa_s,
1019         gen_helper_sve_fexpa_d,
1020     };
1021     if (a->esz == 0) {
1022         return false;
1023     }
1024     if (sve_access_check(s)) {
1025         unsigned vsz = vec_full_reg_size(s);
1026         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                            vec_full_reg_offset(s, a->rn),
1028                            vsz, vsz, 0, fns[a->esz]);
1029     }
1030     return true;
1031 }
1032
1033 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034 {
1035     static gen_helper_gvec_3 * const fns[4] = {
1036         NULL,
1037         gen_helper_sve_ftssel_h,
1038         gen_helper_sve_ftssel_s,
1039         gen_helper_sve_ftssel_d,
1040     };
1041     if (a->esz == 0) {
1042         return false;
1043     }
1044     if (sve_access_check(s)) {
1045         unsigned vsz = vec_full_reg_size(s);
1046         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                            vec_full_reg_offset(s, a->rn),
1048                            vec_full_reg_offset(s, a->rm),
1049                            vsz, vsz, 0, fns[a->esz]);
1050     }
1051     return true;
1052 }
1053
1054 /*
1055  *** SVE Predicate Logical Operations Group
1056  */
1057
1058 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                           const GVecGen4 *gvec_op)
1060 {
1061     if (!sve_access_check(s)) {
1062         return true;
1063     }
1064
1065     unsigned psz = pred_gvec_reg_size(s);
1066     int dofs = pred_full_reg_offset(s, a->rd);
1067     int nofs = pred_full_reg_offset(s, a->rn);
1068     int mofs = pred_full_reg_offset(s, a->rm);
1069     int gofs = pred_full_reg_offset(s, a->pg);
1070
1071     if (psz == 8) {
1072         /* Do the operation and the flags generation in temps.  */
1073         TCGv_i64 pd = tcg_temp_new_i64();
1074         TCGv_i64 pn = tcg_temp_new_i64();
1075         TCGv_i64 pm = tcg_temp_new_i64();
1076         TCGv_i64 pg = tcg_temp_new_i64();
1077
1078         tcg_gen_ld_i64(pn, cpu_env, nofs);
1079         tcg_gen_ld_i64(pm, cpu_env, mofs);
1080         tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082         gvec_op->fni8(pd, pn, pm, pg);
1083         tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085         do_predtest1(pd, pg);
1086
1087         tcg_temp_free_i64(pd);
1088         tcg_temp_free_i64(pn);
1089         tcg_temp_free_i64(pm);
1090         tcg_temp_free_i64(pg);
1091     } else {
1092         /* The operation and flags generation is large.  The computation
1093          * of the flags depends on the original contents of the guarding
1094          * predicate.  If the destination overwrites the guarding predicate,
1095          * then the easiest way to get this right is to save a copy.
1096           */
1097         int tofs = gofs;
1098         if (a->rd == a->pg) {
1099             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101         }
1102
1103         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104         do_predtest(s, dofs, tofs, psz / 8);
1105     }
1106     return true;
1107 }
1108
1109 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110 {
1111     tcg_gen_and_i64(pd, pn, pm);
1112     tcg_gen_and_i64(pd, pd, pg);
1113 }
1114
1115 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                            TCGv_vec pm, TCGv_vec pg)
1117 {
1118     tcg_gen_and_vec(vece, pd, pn, pm);
1119     tcg_gen_and_vec(vece, pd, pd, pg);
1120 }
1121
1122 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123 {
1124     static const GVecGen4 op = {
1125         .fni8 = gen_and_pg_i64,
1126         .fniv = gen_and_pg_vec,
1127         .fno = gen_helper_sve_and_pppp,
1128         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129     };
1130     if (a->s) {
1131         return do_pppp_flags(s, a, &op);
1132     } else if (a->rn == a->rm) {
1133         if (a->pg == a->rn) {
1134             return do_mov_p(s, a->rd, a->rn);
1135         } else {
1136             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137         }
1138     } else if (a->pg == a->rn || a->pg == a->rm) {
1139         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140     } else {
1141         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142     }
1143 }
1144
1145 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146 {
1147     tcg_gen_andc_i64(pd, pn, pm);
1148     tcg_gen_and_i64(pd, pd, pg);
1149 }
1150
1151 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                            TCGv_vec pm, TCGv_vec pg)
1153 {
1154     tcg_gen_andc_vec(vece, pd, pn, pm);
1155     tcg_gen_and_vec(vece, pd, pd, pg);
1156 }
1157
1158 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159 {
1160     static const GVecGen4 op = {
1161         .fni8 = gen_bic_pg_i64,
1162         .fniv = gen_bic_pg_vec,
1163         .fno = gen_helper_sve_bic_pppp,
1164         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165     };
1166     if (a->s) {
1167         return do_pppp_flags(s, a, &op);
1168     } else if (a->pg == a->rn) {
1169         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170     } else {
1171         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172     }
1173 }
1174
1175 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176 {
1177     tcg_gen_xor_i64(pd, pn, pm);
1178     tcg_gen_and_i64(pd, pd, pg);
1179 }
1180
1181 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                            TCGv_vec pm, TCGv_vec pg)
1183 {
1184     tcg_gen_xor_vec(vece, pd, pn, pm);
1185     tcg_gen_and_vec(vece, pd, pd, pg);
1186 }
1187
1188 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189 {
1190     static const GVecGen4 op = {
1191         .fni8 = gen_eor_pg_i64,
1192         .fniv = gen_eor_pg_vec,
1193         .fno = gen_helper_sve_eor_pppp,
1194         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195     };
1196     if (a->s) {
1197         return do_pppp_flags(s, a, &op);
1198     } else {
1199         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200     }
1201 }
1202
1203 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204 {
1205     tcg_gen_and_i64(pn, pn, pg);
1206     tcg_gen_andc_i64(pm, pm, pg);
1207     tcg_gen_or_i64(pd, pn, pm);
1208 }
1209
1210 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                            TCGv_vec pm, TCGv_vec pg)
1212 {
1213     tcg_gen_and_vec(vece, pn, pn, pg);
1214     tcg_gen_andc_vec(vece, pm, pm, pg);
1215     tcg_gen_or_vec(vece, pd, pn, pm);
1216 }
1217
1218 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219 {
1220     static const GVecGen4 op = {
1221         .fni8 = gen_sel_pg_i64,
1222         .fniv = gen_sel_pg_vec,
1223         .fno = gen_helper_sve_sel_pppp,
1224         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225     };
1226     if (a->s) {
1227         return false;
1228     } else {
1229         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230     }
1231 }
1232
1233 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234 {
1235     tcg_gen_or_i64(pd, pn, pm);
1236     tcg_gen_and_i64(pd, pd, pg);
1237 }
1238
1239 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                            TCGv_vec pm, TCGv_vec pg)
1241 {
1242     tcg_gen_or_vec(vece, pd, pn, pm);
1243     tcg_gen_and_vec(vece, pd, pd, pg);
1244 }
1245
1246 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247 {
1248     static const GVecGen4 op = {
1249         .fni8 = gen_orr_pg_i64,
1250         .fniv = gen_orr_pg_vec,
1251         .fno = gen_helper_sve_orr_pppp,
1252         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253     };
1254     if (a->s) {
1255         return do_pppp_flags(s, a, &op);
1256     } else if (a->pg == a->rn && a->rn == a->rm) {
1257         return do_mov_p(s, a->rd, a->rn);
1258     } else {
1259         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260     }
1261 }
1262
1263 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264 {
1265     tcg_gen_orc_i64(pd, pn, pm);
1266     tcg_gen_and_i64(pd, pd, pg);
1267 }
1268
1269 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                            TCGv_vec pm, TCGv_vec pg)
1271 {
1272     tcg_gen_orc_vec(vece, pd, pn, pm);
1273     tcg_gen_and_vec(vece, pd, pd, pg);
1274 }
1275
1276 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277 {
1278     static const GVecGen4 op = {
1279         .fni8 = gen_orn_pg_i64,
1280         .fniv = gen_orn_pg_vec,
1281         .fno = gen_helper_sve_orn_pppp,
1282         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283     };
1284     if (a->s) {
1285         return do_pppp_flags(s, a, &op);
1286     } else {
1287         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288     }
1289 }
1290
1291 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292 {
1293     tcg_gen_or_i64(pd, pn, pm);
1294     tcg_gen_andc_i64(pd, pg, pd);
1295 }
1296
1297 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                            TCGv_vec pm, TCGv_vec pg)
1299 {
1300     tcg_gen_or_vec(vece, pd, pn, pm);
1301     tcg_gen_andc_vec(vece, pd, pg, pd);
1302 }
1303
1304 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305 {
1306     static const GVecGen4 op = {
1307         .fni8 = gen_nor_pg_i64,
1308         .fniv = gen_nor_pg_vec,
1309         .fno = gen_helper_sve_nor_pppp,
1310         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311     };
1312     if (a->s) {
1313         return do_pppp_flags(s, a, &op);
1314     } else {
1315         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316     }
1317 }
1318
1319 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320 {
1321     tcg_gen_and_i64(pd, pn, pm);
1322     tcg_gen_andc_i64(pd, pg, pd);
1323 }
1324
1325 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                            TCGv_vec pm, TCGv_vec pg)
1327 {
1328     tcg_gen_and_vec(vece, pd, pn, pm);
1329     tcg_gen_andc_vec(vece, pd, pg, pd);
1330 }
1331
1332 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333 {
1334     static const GVecGen4 op = {
1335         .fni8 = gen_nand_pg_i64,
1336         .fniv = gen_nand_pg_vec,
1337         .fno = gen_helper_sve_nand_pppp,
1338         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339     };
1340     if (a->s) {
1341         return do_pppp_flags(s, a, &op);
1342     } else {
1343         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344     }
1345 }
1346
1347 /*
1348  *** SVE Predicate Misc Group
1349  */
1350
1351 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352 {
1353     if (sve_access_check(s)) {
1354         int nofs = pred_full_reg_offset(s, a->rn);
1355         int gofs = pred_full_reg_offset(s, a->pg);
1356         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358         if (words == 1) {
1359             TCGv_i64 pn = tcg_temp_new_i64();
1360             TCGv_i64 pg = tcg_temp_new_i64();
1361
1362             tcg_gen_ld_i64(pn, cpu_env, nofs);
1363             tcg_gen_ld_i64(pg, cpu_env, gofs);
1364             do_predtest1(pn, pg);
1365
1366             tcg_temp_free_i64(pn);
1367             tcg_temp_free_i64(pg);
1368         } else {
1369             do_predtest(s, nofs, gofs, words);
1370         }
1371     }
1372     return true;
1373 }
1374
1375 /* See the ARM pseudocode DecodePredCount.  */
1376 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377 {
1378     unsigned elements = fullsz >> esz;
1379     unsigned bound;
1380
1381     switch (pattern) {
1382     case 0x0: /* POW2 */
1383         return pow2floor(elements);
1384     case 0x1: /* VL1 */
1385     case 0x2: /* VL2 */
1386     case 0x3: /* VL3 */
1387     case 0x4: /* VL4 */
1388     case 0x5: /* VL5 */
1389     case 0x6: /* VL6 */
1390     case 0x7: /* VL7 */
1391     case 0x8: /* VL8 */
1392         bound = pattern;
1393         break;
1394     case 0x9: /* VL16 */
1395     case 0xa: /* VL32 */
1396     case 0xb: /* VL64 */
1397     case 0xc: /* VL128 */
1398     case 0xd: /* VL256 */
1399         bound = 16 << (pattern - 9);
1400         break;
1401     case 0x1d: /* MUL4 */
1402         return elements - elements % 4;
1403     case 0x1e: /* MUL3 */
1404         return elements - elements % 3;
1405     case 0x1f: /* ALL */
1406         return elements;
1407     default:   /* #uimm5 */
1408         return 0;
1409     }
1410     return elements >= bound ? bound : 0;
1411 }
1412
1413 /* This handles all of the predicate initialization instructions,
1414  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415  * so that decode_pred_count returns 0.  For SETFFR, we will have
1416  * set RD == 16 == FFR.
1417  */
1418 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419 {
1420     if (!sve_access_check(s)) {
1421         return true;
1422     }
1423
1424     unsigned fullsz = vec_full_reg_size(s);
1425     unsigned ofs = pred_full_reg_offset(s, rd);
1426     unsigned numelem, setsz, i;
1427     uint64_t word, lastword;
1428     TCGv_i64 t;
1429
1430     numelem = decode_pred_count(fullsz, pat, esz);
1431
1432     /* Determine what we must store into each bit, and how many.  */
1433     if (numelem == 0) {
1434         lastword = word = 0;
1435         setsz = fullsz;
1436     } else {
1437         setsz = numelem << esz;
1438         lastword = word = pred_esz_masks[esz];
1439         if (setsz % 64) {
1440             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441         }
1442     }
1443
1444     t = tcg_temp_new_i64();
1445     if (fullsz <= 64) {
1446         tcg_gen_movi_i64(t, lastword);
1447         tcg_gen_st_i64(t, cpu_env, ofs);
1448         goto done;
1449     }
1450
1451     if (word == lastword) {
1452         unsigned maxsz = size_for_gvec(fullsz / 8);
1453         unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455         if (oprsz * 8 == setsz) {
1456             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457             goto done;
1458         }
1459     }
1460
1461     setsz /= 8;
1462     fullsz /= 8;
1463
1464     tcg_gen_movi_i64(t, word);
1465     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466         tcg_gen_st_i64(t, cpu_env, ofs + i);
1467     }
1468     if (lastword != word) {
1469         tcg_gen_movi_i64(t, lastword);
1470         tcg_gen_st_i64(t, cpu_env, ofs + i);
1471         i += 8;
1472     }
1473     if (i < fullsz) {
1474         tcg_gen_movi_i64(t, 0);
1475         for (; i < fullsz; i += 8) {
1476             tcg_gen_st_i64(t, cpu_env, ofs + i);
1477         }
1478     }
1479
1480  done:
1481     tcg_temp_free_i64(t);
1482
1483     /* PTRUES */
1484     if (setflag) {
1485         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486         tcg_gen_movi_i32(cpu_CF, word == 0);
1487         tcg_gen_movi_i32(cpu_VF, 0);
1488         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489     }
1490     return true;
1491 }
1492
1493 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494 {
1495     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496 }
1497
1498 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499 {
1500     /* Note pat == 31 is #all, to set all elements.  */
1501     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502 }
1503
1504 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505 {
1506     /* Note pat == 32 is #unimp, to set no elements.  */
1507     return do_predset(s, 0, a->rd, 32, false);
1508 }
1509
1510 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511 {
1512     /* The path through do_pppp_flags is complicated enough to want to avoid
1513      * duplication.  Frob the arguments into the form of a predicated AND.
1514      */
1515     arg_rprr_s alt_a = {
1516         .rd = a->rd, .pg = a->pg, .s = a->s,
1517         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518     };
1519     return trans_AND_pppp(s, &alt_a);
1520 }
1521
1522 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523 {
1524     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525 }
1526
1527 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528 {
1529     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530 }
1531
1532 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                            TCGv_ptr, TCGv_i32))
1535 {
1536     if (!sve_access_check(s)) {
1537         return true;
1538     }
1539
1540     TCGv_ptr t_pd = tcg_temp_new_ptr();
1541     TCGv_ptr t_pg = tcg_temp_new_ptr();
1542     TCGv_i32 t;
1543     unsigned desc;
1544
1545     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550     t = tcg_const_i32(desc);
1551
1552     gen_fn(t, t_pd, t_pg, t);
1553     tcg_temp_free_ptr(t_pd);
1554     tcg_temp_free_ptr(t_pg);
1555
1556     do_pred_flags(t);
1557     tcg_temp_free_i32(t);
1558     return true;
1559 }
1560
1561 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562 {
1563     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564 }
1565
1566 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567 {
1568     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569 }
1570
1571 /*
1572  *** SVE Element Count Group
1573  */
1574
1575 /* Perform an inline saturating addition of a 32-bit value within
1576  * a 64-bit register.  The second operand is known to be positive,
1577  * which halves the comparisions we must perform to bound the result.
1578  */
1579 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580 {
1581     int64_t ibound;
1582     TCGv_i64 bound;
1583     TCGCond cond;
1584
1585     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586     if (u) {
1587         tcg_gen_ext32u_i64(reg, reg);
1588     } else {
1589         tcg_gen_ext32s_i64(reg, reg);
1590     }
1591     if (d) {
1592         tcg_gen_sub_i64(reg, reg, val);
1593         ibound = (u ? 0 : INT32_MIN);
1594         cond = TCG_COND_LT;
1595     } else {
1596         tcg_gen_add_i64(reg, reg, val);
1597         ibound = (u ? UINT32_MAX : INT32_MAX);
1598         cond = TCG_COND_GT;
1599     }
1600     bound = tcg_const_i64(ibound);
1601     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602     tcg_temp_free_i64(bound);
1603 }
1604
1605 /* Similarly with 64-bit values.  */
1606 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607 {
1608     TCGv_i64 t0 = tcg_temp_new_i64();
1609     TCGv_i64 t1 = tcg_temp_new_i64();
1610     TCGv_i64 t2;
1611
1612     if (u) {
1613         if (d) {
1614             tcg_gen_sub_i64(t0, reg, val);
1615             tcg_gen_movi_i64(t1, 0);
1616             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617         } else {
1618             tcg_gen_add_i64(t0, reg, val);
1619             tcg_gen_movi_i64(t1, -1);
1620             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621         }
1622     } else {
1623         if (d) {
1624             /* Detect signed overflow for subtraction.  */
1625             tcg_gen_xor_i64(t0, reg, val);
1626             tcg_gen_sub_i64(t1, reg, val);
1627             tcg_gen_xor_i64(reg, reg, t1);
1628             tcg_gen_and_i64(t0, t0, reg);
1629
1630             /* Bound the result.  */
1631             tcg_gen_movi_i64(reg, INT64_MIN);
1632             t2 = tcg_const_i64(0);
1633             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634         } else {
1635             /* Detect signed overflow for addition.  */
1636             tcg_gen_xor_i64(t0, reg, val);
1637             tcg_gen_add_i64(reg, reg, val);
1638             tcg_gen_xor_i64(t1, reg, val);
1639             tcg_gen_andc_i64(t0, t1, t0);
1640
1641             /* Bound the result.  */
1642             tcg_gen_movi_i64(t1, INT64_MAX);
1643             t2 = tcg_const_i64(0);
1644             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645         }
1646         tcg_temp_free_i64(t2);
1647     }
1648     tcg_temp_free_i64(t0);
1649     tcg_temp_free_i64(t1);
1650 }
1651
1652 /* Similarly with a vector and a scalar operand.  */
1653 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                               TCGv_i64 val, bool u, bool d)
1655 {
1656     unsigned vsz = vec_full_reg_size(s);
1657     TCGv_ptr dptr, nptr;
1658     TCGv_i32 t32, desc;
1659     TCGv_i64 t64;
1660
1661     dptr = tcg_temp_new_ptr();
1662     nptr = tcg_temp_new_ptr();
1663     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667     switch (esz) {
1668     case MO_8:
1669         t32 = tcg_temp_new_i32();
1670         tcg_gen_extrl_i64_i32(t32, val);
1671         if (d) {
1672             tcg_gen_neg_i32(t32, t32);
1673         }
1674         if (u) {
1675             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676         } else {
1677             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678         }
1679         tcg_temp_free_i32(t32);
1680         break;
1681
1682     case MO_16:
1683         t32 = tcg_temp_new_i32();
1684         tcg_gen_extrl_i64_i32(t32, val);
1685         if (d) {
1686             tcg_gen_neg_i32(t32, t32);
1687         }
1688         if (u) {
1689             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690         } else {
1691             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692         }
1693         tcg_temp_free_i32(t32);
1694         break;
1695
1696     case MO_32:
1697         t64 = tcg_temp_new_i64();
1698         if (d) {
1699             tcg_gen_neg_i64(t64, val);
1700         } else {
1701             tcg_gen_mov_i64(t64, val);
1702         }
1703         if (u) {
1704             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705         } else {
1706             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707         }
1708         tcg_temp_free_i64(t64);
1709         break;
1710
1711     case MO_64:
1712         if (u) {
1713             if (d) {
1714                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715             } else {
1716                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717             }
1718         } else if (d) {
1719             t64 = tcg_temp_new_i64();
1720             tcg_gen_neg_i64(t64, val);
1721             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722             tcg_temp_free_i64(t64);
1723         } else {
1724             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725         }
1726         break;
1727
1728     default:
1729         g_assert_not_reached();
1730     }
1731
1732     tcg_temp_free_ptr(dptr);
1733     tcg_temp_free_ptr(nptr);
1734     tcg_temp_free_i32(desc);
1735 }
1736
1737 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738 {
1739     if (sve_access_check(s)) {
1740         unsigned fullsz = vec_full_reg_size(s);
1741         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743     }
1744     return true;
1745 }
1746
1747 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748 {
1749     if (sve_access_check(s)) {
1750         unsigned fullsz = vec_full_reg_size(s);
1751         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752         int inc = numelem * a->imm * (a->d ? -1 : 1);
1753         TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755         tcg_gen_addi_i64(reg, reg, inc);
1756     }
1757     return true;
1758 }
1759
1760 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761 {
1762     if (!sve_access_check(s)) {
1763         return true;
1764     }
1765
1766     unsigned fullsz = vec_full_reg_size(s);
1767     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768     int inc = numelem * a->imm;
1769     TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772     if (inc == 0) {
1773         if (a->u) {
1774             tcg_gen_ext32u_i64(reg, reg);
1775         } else {
1776             tcg_gen_ext32s_i64(reg, reg);
1777         }
1778     } else {
1779         TCGv_i64 t = tcg_const_i64(inc);
1780         do_sat_addsub_32(reg, t, a->u, a->d);
1781         tcg_temp_free_i64(t);
1782     }
1783     return true;
1784 }
1785
1786 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787 {
1788     if (!sve_access_check(s)) {
1789         return true;
1790     }
1791
1792     unsigned fullsz = vec_full_reg_size(s);
1793     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794     int inc = numelem * a->imm;
1795     TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797     if (inc != 0) {
1798         TCGv_i64 t = tcg_const_i64(inc);
1799         do_sat_addsub_64(reg, t, a->u, a->d);
1800         tcg_temp_free_i64(t);
1801     }
1802     return true;
1803 }
1804
1805 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806 {
1807     if (a->esz == 0) {
1808         return false;
1809     }
1810
1811     unsigned fullsz = vec_full_reg_size(s);
1812     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813     int inc = numelem * a->imm;
1814
1815     if (inc != 0) {
1816         if (sve_access_check(s)) {
1817             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                               vec_full_reg_offset(s, a->rn),
1820                               t, fullsz, fullsz);
1821             tcg_temp_free_i64(t);
1822         }
1823     } else {
1824         do_mov_z(s, a->rd, a->rn);
1825     }
1826     return true;
1827 }
1828
1829 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830 {
1831     if (a->esz == 0) {
1832         return false;
1833     }
1834
1835     unsigned fullsz = vec_full_reg_size(s);
1836     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837     int inc = numelem * a->imm;
1838
1839     if (inc != 0) {
1840         if (sve_access_check(s)) {
1841             TCGv_i64 t = tcg_const_i64(inc);
1842             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843             tcg_temp_free_i64(t);
1844         }
1845     } else {
1846         do_mov_z(s, a->rd, a->rn);
1847     }
1848     return true;
1849 }
1850
1851 /*
1852  *** SVE Bitwise Immediate Group
1853  */
1854
1855 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856 {
1857     uint64_t imm;
1858     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                 extract32(a->dbm, 0, 6),
1860                                 extract32(a->dbm, 6, 6))) {
1861         return false;
1862     }
1863     if (sve_access_check(s)) {
1864         unsigned vsz = vec_full_reg_size(s);
1865         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867     }
1868     return true;
1869 }
1870
1871 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872 {
1873     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874 }
1875
1876 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877 {
1878     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879 }
1880
1881 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884 }
1885
1886 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887 {
1888     uint64_t imm;
1889     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                 extract32(a->dbm, 0, 6),
1891                                 extract32(a->dbm, 6, 6))) {
1892         return false;
1893     }
1894     if (sve_access_check(s)) {
1895         do_dupi_z(s, a->rd, imm);
1896     }
1897     return true;
1898 }
1899
1900 /*
1901  *** SVE Integer Wide Immediate - Predicated Group
1902  */
1903
1904 /* Implement all merging copies.  This is used for CPY (immediate),
1905  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906  */
1907 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                      TCGv_i64 val)
1909 {
1910     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911     static gen_cpy * const fns[4] = {
1912         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914     };
1915     unsigned vsz = vec_full_reg_size(s);
1916     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917     TCGv_ptr t_zd = tcg_temp_new_ptr();
1918     TCGv_ptr t_zn = tcg_temp_new_ptr();
1919     TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925     fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927     tcg_temp_free_ptr(t_zd);
1928     tcg_temp_free_ptr(t_zn);
1929     tcg_temp_free_ptr(t_pg);
1930     tcg_temp_free_i32(desc);
1931 }
1932
1933 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934 {
1935     if (a->esz == 0) {
1936         return false;
1937     }
1938     if (sve_access_check(s)) {
1939         /* Decode the VFP immediate.  */
1940         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941         TCGv_i64 t_imm = tcg_const_i64(imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949 {
1950     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951         return false;
1952     }
1953     if (sve_access_check(s)) {
1954         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956         tcg_temp_free_i64(t_imm);
1957     }
1958     return true;
1959 }
1960
1961 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962 {
1963     static gen_helper_gvec_2i * const fns[4] = {
1964         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966     };
1967
1968     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969         return false;
1970     }
1971     if (sve_access_check(s)) {
1972         unsigned vsz = vec_full_reg_size(s);
1973         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                             pred_full_reg_offset(s, a->pg),
1976                             t_imm, vsz, vsz, 0, fns[a->esz]);
1977         tcg_temp_free_i64(t_imm);
1978     }
1979     return true;
1980 }
1981
1982 /*
1983  *** SVE Permute Extract Group
1984  */
1985
1986 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987 {
1988     if (!sve_access_check(s)) {
1989         return true;
1990     }
1991
1992     unsigned vsz = vec_full_reg_size(s);
1993     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994     unsigned n_siz = vsz - n_ofs;
1995     unsigned d = vec_full_reg_offset(s, a->rd);
1996     unsigned n = vec_full_reg_offset(s, a->rn);
1997     unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999     /* Use host vector move insns if we have appropriate sizes
2000      * and no unfortunate overlap.
2001      */
2002     if (m != d
2003         && n_ofs == size_for_gvec(n_ofs)
2004         && n_siz == size_for_gvec(n_siz)
2005         && (d != n || n_siz <= n_ofs)) {
2006         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007         if (n_ofs != 0) {
2008             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009         }
2010     } else {
2011         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012     }
2013     return true;
2014 }
2015
2016 /*
2017  *** SVE Permute - Unpredicated Group
2018  */
2019
2020 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021 {
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                              vsz, vsz, cpu_reg_sp(s, a->rn));
2026     }
2027     return true;
2028 }
2029
2030 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031 {
2032     if ((a->imm & 0x1f) == 0) {
2033         return false;
2034     }
2035     if (sve_access_check(s)) {
2036         unsigned vsz = vec_full_reg_size(s);
2037         unsigned dofs = vec_full_reg_offset(s, a->rd);
2038         unsigned esz, index;
2039
2040         esz = ctz32(a->imm);
2041         index = a->imm >> (esz + 1);
2042
2043         if ((index << esz) < vsz) {
2044             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046         } else {
2047             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048         }
2049     }
2050     return true;
2051 }
2052
2053 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2054 {
2055     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056     static gen_insr * const fns[4] = {
2057         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059     };
2060     unsigned vsz = vec_full_reg_size(s);
2061     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2062     TCGv_ptr t_zd = tcg_temp_new_ptr();
2063     TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068     fns[a->esz](t_zd, t_zn, val, desc);
2069
2070     tcg_temp_free_ptr(t_zd);
2071     tcg_temp_free_ptr(t_zn);
2072     tcg_temp_free_i32(desc);
2073 }
2074
2075 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2076 {
2077     if (sve_access_check(s)) {
2078         TCGv_i64 t = tcg_temp_new_i64();
2079         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2080         do_insr_i64(s, a, t);
2081         tcg_temp_free_i64(t);
2082     }
2083     return true;
2084 }
2085
2086 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2087 {
2088     if (sve_access_check(s)) {
2089         do_insr_i64(s, a, cpu_reg(s, a->rm));
2090     }
2091     return true;
2092 }
2093
2094 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2095 {
2096     static gen_helper_gvec_2 * const fns[4] = {
2097         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099     };
2100
2101     if (sve_access_check(s)) {
2102         unsigned vsz = vec_full_reg_size(s);
2103         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104                            vec_full_reg_offset(s, a->rn),
2105                            vsz, vsz, 0, fns[a->esz]);
2106     }
2107     return true;
2108 }
2109
2110 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2111 {
2112     static gen_helper_gvec_3 * const fns[4] = {
2113         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115     };
2116
2117     if (sve_access_check(s)) {
2118         unsigned vsz = vec_full_reg_size(s);
2119         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120                            vec_full_reg_offset(s, a->rn),
2121                            vec_full_reg_offset(s, a->rm),
2122                            vsz, vsz, 0, fns[a->esz]);
2123     }
2124     return true;
2125 }
2126
2127 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2128 {
2129     static gen_helper_gvec_2 * const fns[4][2] = {
2130         { NULL, NULL },
2131         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134     };
2135
2136     if (a->esz == 0) {
2137         return false;
2138     }
2139     if (sve_access_check(s)) {
2140         unsigned vsz = vec_full_reg_size(s);
2141         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142                            vec_full_reg_offset(s, a->rn)
2143                            + (a->h ? vsz / 2 : 0),
2144                            vsz, vsz, 0, fns[a->esz][a->u]);
2145     }
2146     return true;
2147 }
2148
2149 /*
2150  *** SVE Permute - Predicates Group
2151  */
2152
2153 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2154                           gen_helper_gvec_3 *fn)
2155 {
2156     if (!sve_access_check(s)) {
2157         return true;
2158     }
2159
2160     unsigned vsz = pred_full_reg_size(s);
2161
2162     /* Predicate sizes may be smaller and cannot use simd_desc.
2163        We cannot round up, as we do elsewhere, because we need
2164        the exact size for ZIP2 and REV.  We retain the style for
2165        the other helpers for consistency.  */
2166     TCGv_ptr t_d = tcg_temp_new_ptr();
2167     TCGv_ptr t_n = tcg_temp_new_ptr();
2168     TCGv_ptr t_m = tcg_temp_new_ptr();
2169     TCGv_i32 t_desc;
2170     int desc;
2171
2172     desc = vsz - 2;
2173     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2174     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2175
2176     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179     t_desc = tcg_const_i32(desc);
2180
2181     fn(t_d, t_n, t_m, t_desc);
2182
2183     tcg_temp_free_ptr(t_d);
2184     tcg_temp_free_ptr(t_n);
2185     tcg_temp_free_ptr(t_m);
2186     tcg_temp_free_i32(t_desc);
2187     return true;
2188 }
2189
2190 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2191                           gen_helper_gvec_2 *fn)
2192 {
2193     if (!sve_access_check(s)) {
2194         return true;
2195     }
2196
2197     unsigned vsz = pred_full_reg_size(s);
2198     TCGv_ptr t_d = tcg_temp_new_ptr();
2199     TCGv_ptr t_n = tcg_temp_new_ptr();
2200     TCGv_i32 t_desc;
2201     int desc;
2202
2203     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206     /* Predicate sizes may be smaller and cannot use simd_desc.
2207        We cannot round up, as we do elsewhere, because we need
2208        the exact size for ZIP2 and REV.  We retain the style for
2209        the other helpers for consistency.  */
2210
2211     desc = vsz - 2;
2212     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2213     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2214     t_desc = tcg_const_i32(desc);
2215
2216     fn(t_d, t_n, t_desc);
2217
2218     tcg_temp_free_i32(t_desc);
2219     tcg_temp_free_ptr(t_d);
2220     tcg_temp_free_ptr(t_n);
2221     return true;
2222 }
2223
2224 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2225 {
2226     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227 }
2228
2229 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2230 {
2231     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232 }
2233
2234 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2235 {
2236     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237 }
2238
2239 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2240 {
2241     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242 }
2243
2244 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2245 {
2246     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247 }
2248
2249 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2250 {
2251     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252 }
2253
2254 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2255 {
2256     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257 }
2258
2259 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2260 {
2261     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262 }
2263
2264 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2265 {
2266     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2267 }
2268
2269 /*
2270  *** SVE Permute - Interleaving Group
2271  */
2272
2273 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2274 {
2275     static gen_helper_gvec_3 * const fns[4] = {
2276         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278     };
2279
2280     if (sve_access_check(s)) {
2281         unsigned vsz = vec_full_reg_size(s);
2282         unsigned high_ofs = high ? vsz / 2 : 0;
2283         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                            vec_full_reg_offset(s, a->rn) + high_ofs,
2285                            vec_full_reg_offset(s, a->rm) + high_ofs,
2286                            vsz, vsz, 0, fns[a->esz]);
2287     }
2288     return true;
2289 }
2290
2291 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2292                             gen_helper_gvec_3 *fn)
2293 {
2294     if (sve_access_check(s)) {
2295         unsigned vsz = vec_full_reg_size(s);
2296         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297                            vec_full_reg_offset(s, a->rn),
2298                            vec_full_reg_offset(s, a->rm),
2299                            vsz, vsz, data, fn);
2300     }
2301     return true;
2302 }
2303
2304 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2305 {
2306     return do_zip(s, a, false);
2307 }
2308
2309 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2310 {
2311     return do_zip(s, a, true);
2312 }
2313
2314 static gen_helper_gvec_3 * const uzp_fns[4] = {
2315     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317 };
2318
2319 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2320 {
2321     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322 }
2323
2324 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2325 {
2326     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327 }
2328
2329 static gen_helper_gvec_3 * const trn_fns[4] = {
2330     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332 };
2333
2334 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2335 {
2336     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337 }
2338
2339 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2340 {
2341     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2342 }
2343
2344 /*
2345  *** SVE Permute Vector - Predicated Group
2346  */
2347
2348 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2349 {
2350     static gen_helper_gvec_3 * const fns[4] = {
2351         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352     };
2353     return do_zpz_ool(s, a, fns[a->esz]);
2354 }
2355
2356 /* Call the helper that computes the ARM LastActiveElement pseudocode
2357  * function, scaled by the element size.  This includes the not found
2358  * indication; e.g. not found for esz=3 is -8.
2359  */
2360 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2361 {
2362     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2363      * round up, as we do elsewhere, because we need the exact size.
2364      */
2365     TCGv_ptr t_p = tcg_temp_new_ptr();
2366     TCGv_i32 t_desc;
2367     unsigned vsz = pred_full_reg_size(s);
2368     unsigned desc;
2369
2370     desc = vsz - 2;
2371     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2372
2373     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374     t_desc = tcg_const_i32(desc);
2375
2376     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378     tcg_temp_free_i32(t_desc);
2379     tcg_temp_free_ptr(t_p);
2380 }
2381
2382 /* Increment LAST to the offset of the next element in the vector,
2383  * wrapping around to 0.
2384  */
2385 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2386 {
2387     unsigned vsz = vec_full_reg_size(s);
2388
2389     tcg_gen_addi_i32(last, last, 1 << esz);
2390     if (is_power_of_2(vsz)) {
2391         tcg_gen_andi_i32(last, last, vsz - 1);
2392     } else {
2393         TCGv_i32 max = tcg_const_i32(vsz);
2394         TCGv_i32 zero = tcg_const_i32(0);
2395         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396         tcg_temp_free_i32(max);
2397         tcg_temp_free_i32(zero);
2398     }
2399 }
2400
2401 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2402 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2403 {
2404     unsigned vsz = vec_full_reg_size(s);
2405
2406     if (is_power_of_2(vsz)) {
2407         tcg_gen_andi_i32(last, last, vsz - 1);
2408     } else {
2409         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2410         TCGv_i32 zero = tcg_const_i32(0);
2411         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412         tcg_temp_free_i32(max);
2413         tcg_temp_free_i32(zero);
2414     }
2415 }
2416
2417 /* Load an unsigned element of ESZ from BASE+OFS.  */
2418 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419 {
2420     TCGv_i64 r = tcg_temp_new_i64();
2421
2422     switch (esz) {
2423     case 0:
2424         tcg_gen_ld8u_i64(r, base, ofs);
2425         break;
2426     case 1:
2427         tcg_gen_ld16u_i64(r, base, ofs);
2428         break;
2429     case 2:
2430         tcg_gen_ld32u_i64(r, base, ofs);
2431         break;
2432     case 3:
2433         tcg_gen_ld_i64(r, base, ofs);
2434         break;
2435     default:
2436         g_assert_not_reached();
2437     }
2438     return r;
2439 }
2440
2441 /* Load an unsigned element of ESZ from RM[LAST].  */
2442 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443                                  int rm, int esz)
2444 {
2445     TCGv_ptr p = tcg_temp_new_ptr();
2446     TCGv_i64 r;
2447
2448     /* Convert offset into vector into offset into ENV.
2449      * The final adjustment for the vector register base
2450      * is added via constant offset to the load.
2451      */
2452 #ifdef HOST_WORDS_BIGENDIAN
2453     /* Adjust for element ordering.  See vec_reg_offset.  */
2454     if (esz < 3) {
2455         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2456     }
2457 #endif
2458     tcg_gen_ext_i32_ptr(p, last);
2459     tcg_gen_add_ptr(p, p, cpu_env);
2460
2461     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462     tcg_temp_free_ptr(p);
2463
2464     return r;
2465 }
2466
2467 /* Compute CLAST for a Zreg.  */
2468 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2469 {
2470     TCGv_i32 last;
2471     TCGLabel *over;
2472     TCGv_i64 ele;
2473     unsigned vsz, esz = a->esz;
2474
2475     if (!sve_access_check(s)) {
2476         return true;
2477     }
2478
2479     last = tcg_temp_local_new_i32();
2480     over = gen_new_label();
2481
2482     find_last_active(s, last, esz, a->pg);
2483
2484     /* There is of course no movcond for a 2048-bit vector,
2485      * so we must branch over the actual store.
2486      */
2487     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2488
2489     if (!before) {
2490         incr_last_active(s, last, esz);
2491     }
2492
2493     ele = load_last_active(s, last, a->rm, esz);
2494     tcg_temp_free_i32(last);
2495
2496     vsz = vec_full_reg_size(s);
2497     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498     tcg_temp_free_i64(ele);
2499
2500     /* If this insn used MOVPRFX, we may need a second move.  */
2501     if (a->rd != a->rn) {
2502         TCGLabel *done = gen_new_label();
2503         tcg_gen_br(done);
2504
2505         gen_set_label(over);
2506         do_mov_z(s, a->rd, a->rn);
2507
2508         gen_set_label(done);
2509     } else {
2510         gen_set_label(over);
2511     }
2512     return true;
2513 }
2514
2515 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2516 {
2517     return do_clast_vector(s, a, false);
2518 }
2519
2520 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2521 {
2522     return do_clast_vector(s, a, true);
2523 }
2524
2525 /* Compute CLAST for a scalar.  */
2526 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2527                             bool before, TCGv_i64 reg_val)
2528 {
2529     TCGv_i32 last = tcg_temp_new_i32();
2530     TCGv_i64 ele, cmp, zero;
2531
2532     find_last_active(s, last, esz, pg);
2533
2534     /* Extend the original value of last prior to incrementing.  */
2535     cmp = tcg_temp_new_i64();
2536     tcg_gen_ext_i32_i64(cmp, last);
2537
2538     if (!before) {
2539         incr_last_active(s, last, esz);
2540     }
2541
2542     /* The conceit here is that while last < 0 indicates not found, after
2543      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544      * from which we can load garbage.  We then discard the garbage with
2545      * a conditional move.
2546      */
2547     ele = load_last_active(s, last, rm, esz);
2548     tcg_temp_free_i32(last);
2549
2550     zero = tcg_const_i64(0);
2551     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553     tcg_temp_free_i64(zero);
2554     tcg_temp_free_i64(cmp);
2555     tcg_temp_free_i64(ele);
2556 }
2557
2558 /* Compute CLAST for a Vreg.  */
2559 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2560 {
2561     if (sve_access_check(s)) {
2562         int esz = a->esz;
2563         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2564         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567         write_fp_dreg(s, a->rd, reg);
2568         tcg_temp_free_i64(reg);
2569     }
2570     return true;
2571 }
2572
2573 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2574 {
2575     return do_clast_fp(s, a, false);
2576 }
2577
2578 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2579 {
2580     return do_clast_fp(s, a, true);
2581 }
2582
2583 /* Compute CLAST for a Xreg.  */
2584 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2585 {
2586     TCGv_i64 reg;
2587
2588     if (!sve_access_check(s)) {
2589         return true;
2590     }
2591
2592     reg = cpu_reg(s, a->rd);
2593     switch (a->esz) {
2594     case 0:
2595         tcg_gen_ext8u_i64(reg, reg);
2596         break;
2597     case 1:
2598         tcg_gen_ext16u_i64(reg, reg);
2599         break;
2600     case 2:
2601         tcg_gen_ext32u_i64(reg, reg);
2602         break;
2603     case 3:
2604         break;
2605     default:
2606         g_assert_not_reached();
2607     }
2608
2609     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610     return true;
2611 }
2612
2613 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2614 {
2615     return do_clast_general(s, a, false);
2616 }
2617
2618 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2619 {
2620     return do_clast_general(s, a, true);
2621 }
2622
2623 /* Compute LAST for a scalar.  */
2624 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2625                                int pg, int rm, bool before)
2626 {
2627     TCGv_i32 last = tcg_temp_new_i32();
2628     TCGv_i64 ret;
2629
2630     find_last_active(s, last, esz, pg);
2631     if (before) {
2632         wrap_last_active(s, last, esz);
2633     } else {
2634         incr_last_active(s, last, esz);
2635     }
2636
2637     ret = load_last_active(s, last, rm, esz);
2638     tcg_temp_free_i32(last);
2639     return ret;
2640 }
2641
2642 /* Compute LAST for a Vreg.  */
2643 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2644 {
2645     if (sve_access_check(s)) {
2646         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647         write_fp_dreg(s, a->rd, val);
2648         tcg_temp_free_i64(val);
2649     }
2650     return true;
2651 }
2652
2653 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2654 {
2655     return do_last_fp(s, a, false);
2656 }
2657
2658 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2659 {
2660     return do_last_fp(s, a, true);
2661 }
2662
2663 /* Compute LAST for a Xreg.  */
2664 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2665 {
2666     if (sve_access_check(s)) {
2667         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669         tcg_temp_free_i64(val);
2670     }
2671     return true;
2672 }
2673
2674 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2675 {
2676     return do_last_general(s, a, false);
2677 }
2678
2679 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2680 {
2681     return do_last_general(s, a, true);
2682 }
2683
2684 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2685 {
2686     if (sve_access_check(s)) {
2687         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688     }
2689     return true;
2690 }
2691
2692 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2693 {
2694     if (sve_access_check(s)) {
2695         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2696         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698         tcg_temp_free_i64(t);
2699     }
2700     return true;
2701 }
2702
2703 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2704 {
2705     static gen_helper_gvec_3 * const fns[4] = {
2706         NULL,
2707         gen_helper_sve_revb_h,
2708         gen_helper_sve_revb_s,
2709         gen_helper_sve_revb_d,
2710     };
2711     return do_zpz_ool(s, a, fns[a->esz]);
2712 }
2713
2714 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2715 {
2716     static gen_helper_gvec_3 * const fns[4] = {
2717         NULL,
2718         NULL,
2719         gen_helper_sve_revh_s,
2720         gen_helper_sve_revh_d,
2721     };
2722     return do_zpz_ool(s, a, fns[a->esz]);
2723 }
2724
2725 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2726 {
2727     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728 }
2729
2730 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2731 {
2732     static gen_helper_gvec_3 * const fns[4] = {
2733         gen_helper_sve_rbit_b,
2734         gen_helper_sve_rbit_h,
2735         gen_helper_sve_rbit_s,
2736         gen_helper_sve_rbit_d,
2737     };
2738     return do_zpz_ool(s, a, fns[a->esz]);
2739 }
2740
2741 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2742 {
2743     if (sve_access_check(s)) {
2744         unsigned vsz = vec_full_reg_size(s);
2745         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746                            vec_full_reg_offset(s, a->rn),
2747                            vec_full_reg_offset(s, a->rm),
2748                            pred_full_reg_offset(s, a->pg),
2749                            vsz, vsz, a->esz, gen_helper_sve_splice);
2750     }
2751     return true;
2752 }
2753
2754 /*
2755  *** SVE Integer Compare - Vectors Group
2756  */
2757
2758 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2759                           gen_helper_gvec_flags_4 *gen_fn)
2760 {
2761     TCGv_ptr pd, zn, zm, pg;
2762     unsigned vsz;
2763     TCGv_i32 t;
2764
2765     if (gen_fn == NULL) {
2766         return false;
2767     }
2768     if (!sve_access_check(s)) {
2769         return true;
2770     }
2771
2772     vsz = vec_full_reg_size(s);
2773     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2774     pd = tcg_temp_new_ptr();
2775     zn = tcg_temp_new_ptr();
2776     zm = tcg_temp_new_ptr();
2777     pg = tcg_temp_new_ptr();
2778
2779     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784     gen_fn(t, pd, zn, zm, pg, t);
2785
2786     tcg_temp_free_ptr(pd);
2787     tcg_temp_free_ptr(zn);
2788     tcg_temp_free_ptr(zm);
2789     tcg_temp_free_ptr(pg);
2790
2791     do_pred_flags(t);
2792
2793     tcg_temp_free_i32(t);
2794     return true;
2795 }
2796
2797 #define DO_PPZZ(NAME, name) \
2798 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2799 {                                                                         \
2800     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2801         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2802         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2803     };                                                                    \
2804     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2805 }
2806
2807 DO_PPZZ(CMPEQ, cmpeq)
2808 DO_PPZZ(CMPNE, cmpne)
2809 DO_PPZZ(CMPGT, cmpgt)
2810 DO_PPZZ(CMPGE, cmpge)
2811 DO_PPZZ(CMPHI, cmphi)
2812 DO_PPZZ(CMPHS, cmphs)
2813
2814 #undef DO_PPZZ
2815
2816 #define DO_PPZW(NAME, name) \
2817 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2818 {                                                                         \
2819     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2820         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2821         gen_helper_sve_##name##_ppzw_s, NULL                              \
2822     };                                                                    \
2823     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2824 }
2825
2826 DO_PPZW(CMPEQ, cmpeq)
2827 DO_PPZW(CMPNE, cmpne)
2828 DO_PPZW(CMPGT, cmpgt)
2829 DO_PPZW(CMPGE, cmpge)
2830 DO_PPZW(CMPHI, cmphi)
2831 DO_PPZW(CMPHS, cmphs)
2832 DO_PPZW(CMPLT, cmplt)
2833 DO_PPZW(CMPLE, cmple)
2834 DO_PPZW(CMPLO, cmplo)
2835 DO_PPZW(CMPLS, cmpls)
2836
2837 #undef DO_PPZW
2838
2839 /*
2840  *** SVE Integer Compare - Immediate Groups
2841  */
2842
2843 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2844                           gen_helper_gvec_flags_3 *gen_fn)
2845 {
2846     TCGv_ptr pd, zn, pg;
2847     unsigned vsz;
2848     TCGv_i32 t;
2849
2850     if (gen_fn == NULL) {
2851         return false;
2852     }
2853     if (!sve_access_check(s)) {
2854         return true;
2855     }
2856
2857     vsz = vec_full_reg_size(s);
2858     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859     pd = tcg_temp_new_ptr();
2860     zn = tcg_temp_new_ptr();
2861     pg = tcg_temp_new_ptr();
2862
2863     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867     gen_fn(t, pd, zn, pg, t);
2868
2869     tcg_temp_free_ptr(pd);
2870     tcg_temp_free_ptr(zn);
2871     tcg_temp_free_ptr(pg);
2872
2873     do_pred_flags(t);
2874
2875     tcg_temp_free_i32(t);
2876     return true;
2877 }
2878
2879 #define DO_PPZI(NAME, name) \
2880 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2881 {                                                                         \
2882     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2883         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2884         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2885     };                                                                    \
2886     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2887 }
2888
2889 DO_PPZI(CMPEQ, cmpeq)
2890 DO_PPZI(CMPNE, cmpne)
2891 DO_PPZI(CMPGT, cmpgt)
2892 DO_PPZI(CMPGE, cmpge)
2893 DO_PPZI(CMPHI, cmphi)
2894 DO_PPZI(CMPHS, cmphs)
2895 DO_PPZI(CMPLT, cmplt)
2896 DO_PPZI(CMPLE, cmple)
2897 DO_PPZI(CMPLO, cmplo)
2898 DO_PPZI(CMPLS, cmpls)
2899
2900 #undef DO_PPZI
2901
2902 /*
2903  *** SVE Partition Break Group
2904  */
2905
2906 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2907                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2908 {
2909     if (!sve_access_check(s)) {
2910         return true;
2911     }
2912
2913     unsigned vsz = pred_full_reg_size(s);
2914
2915     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2916     TCGv_ptr d = tcg_temp_new_ptr();
2917     TCGv_ptr n = tcg_temp_new_ptr();
2918     TCGv_ptr m = tcg_temp_new_ptr();
2919     TCGv_ptr g = tcg_temp_new_ptr();
2920     TCGv_i32 t = tcg_const_i32(vsz - 2);
2921
2922     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927     if (a->s) {
2928         fn_s(t, d, n, m, g, t);
2929         do_pred_flags(t);
2930     } else {
2931         fn(d, n, m, g, t);
2932     }
2933     tcg_temp_free_ptr(d);
2934     tcg_temp_free_ptr(n);
2935     tcg_temp_free_ptr(m);
2936     tcg_temp_free_ptr(g);
2937     tcg_temp_free_i32(t);
2938     return true;
2939 }
2940
2941 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2942                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2943 {
2944     if (!sve_access_check(s)) {
2945         return true;
2946     }
2947
2948     unsigned vsz = pred_full_reg_size(s);
2949
2950     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2951     TCGv_ptr d = tcg_temp_new_ptr();
2952     TCGv_ptr n = tcg_temp_new_ptr();
2953     TCGv_ptr g = tcg_temp_new_ptr();
2954     TCGv_i32 t = tcg_const_i32(vsz - 2);
2955
2956     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960     if (a->s) {
2961         fn_s(t, d, n, g, t);
2962         do_pred_flags(t);
2963     } else {
2964         fn(d, n, g, t);
2965     }
2966     tcg_temp_free_ptr(d);
2967     tcg_temp_free_ptr(n);
2968     tcg_temp_free_ptr(g);
2969     tcg_temp_free_i32(t);
2970     return true;
2971 }
2972
2973 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2974 {
2975     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976 }
2977
2978 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2979 {
2980     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981 }
2982
2983 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2984 {
2985     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986 }
2987
2988 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2989 {
2990     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991 }
2992
2993 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2994 {
2995     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996 }
2997
2998 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2999 {
3000     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001 }
3002
3003 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3004 {
3005     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006 }
3007
3008 /*
3009  *** SVE Predicate Count Group
3010  */
3011
3012 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3013 {
3014     unsigned psz = pred_full_reg_size(s);
3015
3016     if (psz <= 8) {
3017         uint64_t psz_mask;
3018
3019         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020         if (pn != pg) {
3021             TCGv_i64 g = tcg_temp_new_i64();
3022             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023             tcg_gen_and_i64(val, val, g);
3024             tcg_temp_free_i64(g);
3025         }
3026
3027         /* Reduce the pred_esz_masks value simply to reduce the
3028          * size of the code generated here.
3029          */
3030         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3031         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033         tcg_gen_ctpop_i64(val, val);
3034     } else {
3035         TCGv_ptr t_pn = tcg_temp_new_ptr();
3036         TCGv_ptr t_pg = tcg_temp_new_ptr();
3037         unsigned desc;
3038         TCGv_i32 t_desc;
3039
3040         desc = psz - 2;
3041         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3042
3043         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045         t_desc = tcg_const_i32(desc);
3046
3047         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048         tcg_temp_free_ptr(t_pn);
3049         tcg_temp_free_ptr(t_pg);
3050         tcg_temp_free_i32(t_desc);
3051     }
3052 }
3053
3054 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3055 {
3056     if (sve_access_check(s)) {
3057         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058     }
3059     return true;
3060 }
3061
3062 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3063 {
3064     if (sve_access_check(s)) {
3065         TCGv_i64 reg = cpu_reg(s, a->rd);
3066         TCGv_i64 val = tcg_temp_new_i64();
3067
3068         do_cntp(s, val, a->esz, a->pg, a->pg);
3069         if (a->d) {
3070             tcg_gen_sub_i64(reg, reg, val);
3071         } else {
3072             tcg_gen_add_i64(reg, reg, val);
3073         }
3074         tcg_temp_free_i64(val);
3075     }
3076     return true;
3077 }
3078
3079 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3080 {
3081     if (a->esz == 0) {
3082         return false;
3083     }
3084     if (sve_access_check(s)) {
3085         unsigned vsz = vec_full_reg_size(s);
3086         TCGv_i64 val = tcg_temp_new_i64();
3087         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089         do_cntp(s, val, a->esz, a->pg, a->pg);
3090         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092     }
3093     return true;
3094 }
3095
3096 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3097 {
3098     if (sve_access_check(s)) {
3099         TCGv_i64 reg = cpu_reg(s, a->rd);
3100         TCGv_i64 val = tcg_temp_new_i64();
3101
3102         do_cntp(s, val, a->esz, a->pg, a->pg);
3103         do_sat_addsub_32(reg, val, a->u, a->d);
3104     }
3105     return true;
3106 }
3107
3108 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3109 {
3110     if (sve_access_check(s)) {
3111         TCGv_i64 reg = cpu_reg(s, a->rd);
3112         TCGv_i64 val = tcg_temp_new_i64();
3113
3114         do_cntp(s, val, a->esz, a->pg, a->pg);
3115         do_sat_addsub_64(reg, val, a->u, a->d);
3116     }
3117     return true;
3118 }
3119
3120 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3121 {
3122     if (a->esz == 0) {
3123         return false;
3124     }
3125     if (sve_access_check(s)) {
3126         TCGv_i64 val = tcg_temp_new_i64();
3127         do_cntp(s, val, a->esz, a->pg, a->pg);
3128         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129     }
3130     return true;
3131 }
3132
3133 /*
3134  *** SVE Integer Compare Scalars Group
3135  */
3136
3137 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3138 {
3139     if (!sve_access_check(s)) {
3140         return true;
3141     }
3142
3143     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146     TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150     tcg_temp_free_i64(cmp);
3151
3152     /* VF = !NF & !CF.  */
3153     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3154     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156     /* Both NF and VF actually look at bit 31.  */
3157     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159     return true;
3160 }
3161
3162 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3163 {
3164     TCGv_i64 op0, op1, t0, t1, tmax;
3165     TCGv_i32 t2, t3;
3166     TCGv_ptr ptr;
3167     unsigned desc, vsz = vec_full_reg_size(s);
3168     TCGCond cond;
3169
3170     if (!sve_access_check(s)) {
3171         return true;
3172     }
3173
3174     op0 = read_cpu_reg(s, a->rn, 1);
3175     op1 = read_cpu_reg(s, a->rm, 1);
3176
3177     if (!a->sf) {
3178         if (a->u) {
3179             tcg_gen_ext32u_i64(op0, op0);
3180             tcg_gen_ext32u_i64(op1, op1);
3181         } else {
3182             tcg_gen_ext32s_i64(op0, op0);
3183             tcg_gen_ext32s_i64(op1, op1);
3184         }
3185     }
3186
3187     /* For the helper, compress the different conditions into a computation
3188      * of how many iterations for which the condition is true.
3189      */
3190     t0 = tcg_temp_new_i64();
3191     t1 = tcg_temp_new_i64();
3192     tcg_gen_sub_i64(t0, op1, op0);
3193
3194     tmax = tcg_const_i64(vsz >> a->esz);
3195     if (a->eq) {
3196         /* Equality means one more iteration.  */
3197         tcg_gen_addi_i64(t0, t0, 1);
3198
3199         /* If op1 is max (un)signed integer (and the only time the addition
3200          * above could overflow), then we produce an all-true predicate by
3201          * setting the count to the vector length.  This is because the
3202          * pseudocode is described as an increment + compare loop, and the
3203          * max integer would always compare true.
3204          */
3205         tcg_gen_movi_i64(t1, (a->sf
3206                               ? (a->u ? UINT64_MAX : INT64_MAX)
3207                               : (a->u ? UINT32_MAX : INT32_MAX)));
3208         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209     }
3210
3211     /* Bound to the maximum.  */
3212     tcg_gen_umin_i64(t0, t0, tmax);
3213     tcg_temp_free_i64(tmax);
3214
3215     /* Set the count to zero if the condition is false.  */
3216     cond = (a->u
3217             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219     tcg_gen_movi_i64(t1, 0);
3220     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3221     tcg_temp_free_i64(t1);
3222
3223     /* Since we're bounded, pass as a 32-bit type.  */
3224     t2 = tcg_temp_new_i32();
3225     tcg_gen_extrl_i64_i32(t2, t0);
3226     tcg_temp_free_i64(t0);
3227
3228     /* Scale elements to bits.  */
3229     tcg_gen_shli_i32(t2, t2, a->esz);
3230
3231     desc = (vsz / 8) - 2;
3232     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3233     t3 = tcg_const_i32(desc);
3234
3235     ptr = tcg_temp_new_ptr();
3236     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238     gen_helper_sve_while(t2, ptr, t2, t3);
3239     do_pred_flags(t2);
3240
3241     tcg_temp_free_ptr(ptr);
3242     tcg_temp_free_i32(t2);
3243     tcg_temp_free_i32(t3);
3244     return true;
3245 }
3246
3247 /*
3248  *** SVE Integer Wide Immediate - Unpredicated Group
3249  */
3250
3251 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3252 {
3253     if (a->esz == 0) {
3254         return false;
3255     }
3256     if (sve_access_check(s)) {
3257         unsigned vsz = vec_full_reg_size(s);
3258         int dofs = vec_full_reg_offset(s, a->rd);
3259         uint64_t imm;
3260
3261         /* Decode the VFP immediate.  */
3262         imm = vfp_expand_imm(a->esz, a->imm);
3263         imm = dup_const(a->esz, imm);
3264
3265         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266     }
3267     return true;
3268 }
3269
3270 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3271 {
3272     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3273         return false;
3274     }
3275     if (sve_access_check(s)) {
3276         unsigned vsz = vec_full_reg_size(s);
3277         int dofs = vec_full_reg_offset(s, a->rd);
3278
3279         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280     }
3281     return true;
3282 }
3283
3284 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3285 {
3286     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3287         return false;
3288     }
3289     if (sve_access_check(s)) {
3290         unsigned vsz = vec_full_reg_size(s);
3291         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293     }
3294     return true;
3295 }
3296
3297 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3298 {
3299     a->imm = -a->imm;
3300     return trans_ADD_zzi(s, a);
3301 }
3302
3303 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3304 {
3305     static const GVecGen2s op[4] = {
3306         { .fni8 = tcg_gen_vec_sub8_i64,
3307           .fniv = tcg_gen_sub_vec,
3308           .fno = gen_helper_sve_subri_b,
3309           .opc = INDEX_op_sub_vec,
3310           .vece = MO_8,
3311           .scalar_first = true },
3312         { .fni8 = tcg_gen_vec_sub16_i64,
3313           .fniv = tcg_gen_sub_vec,
3314           .fno = gen_helper_sve_subri_h,
3315           .opc = INDEX_op_sub_vec,
3316           .vece = MO_16,
3317           .scalar_first = true },
3318         { .fni4 = tcg_gen_sub_i32,
3319           .fniv = tcg_gen_sub_vec,
3320           .fno = gen_helper_sve_subri_s,
3321           .opc = INDEX_op_sub_vec,
3322           .vece = MO_32,
3323           .scalar_first = true },
3324         { .fni8 = tcg_gen_sub_i64,
3325           .fniv = tcg_gen_sub_vec,
3326           .fno = gen_helper_sve_subri_d,
3327           .opc = INDEX_op_sub_vec,
3328           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3329           .vece = MO_64,
3330           .scalar_first = true }
3331     };
3332
3333     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3334         return false;
3335     }
3336     if (sve_access_check(s)) {
3337         unsigned vsz = vec_full_reg_size(s);
3338         TCGv_i64 c = tcg_const_i64(a->imm);
3339         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3340                         vec_full_reg_offset(s, a->rn),
3341                         vsz, vsz, c, &op[a->esz]);
3342         tcg_temp_free_i64(c);
3343     }
3344     return true;
3345 }
3346
3347 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3348 {
3349     if (sve_access_check(s)) {
3350         unsigned vsz = vec_full_reg_size(s);
3351         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3352                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3353     }
3354     return true;
3355 }
3356
3357 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3358 {
3359     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3360         return false;
3361     }
3362     if (sve_access_check(s)) {
3363         TCGv_i64 val = tcg_const_i64(a->imm);
3364         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3365         tcg_temp_free_i64(val);
3366     }
3367     return true;
3368 }
3369
3370 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3371 {
3372     return do_zzi_sat(s, a, false, false);
3373 }
3374
3375 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3376 {
3377     return do_zzi_sat(s, a, true, false);
3378 }
3379
3380 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3381 {
3382     return do_zzi_sat(s, a, false, true);
3383 }
3384
3385 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3386 {
3387     return do_zzi_sat(s, a, true, true);
3388 }
3389
3390 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3391 {
3392     if (sve_access_check(s)) {
3393         unsigned vsz = vec_full_reg_size(s);
3394         TCGv_i64 c = tcg_const_i64(a->imm);
3395
3396         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3397                             vec_full_reg_offset(s, a->rn),
3398                             c, vsz, vsz, 0, fn);
3399         tcg_temp_free_i64(c);
3400     }
3401     return true;
3402 }
3403
3404 #define DO_ZZI(NAME, name) \
3405 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3406 {                                                                       \
3407     static gen_helper_gvec_2i * const fns[4] = {                        \
3408         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3409         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3410     };                                                                  \
3411     return do_zzi_ool(s, a, fns[a->esz]);                               \
3412 }
3413
3414 DO_ZZI(SMAX, smax)
3415 DO_ZZI(UMAX, umax)
3416 DO_ZZI(SMIN, smin)
3417 DO_ZZI(UMIN, umin)
3418
3419 #undef DO_ZZI
3420
3421 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3422 {
3423     static gen_helper_gvec_3 * const fns[2][2] = {
3424         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3425         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3426     };
3427
3428     if (sve_access_check(s)) {
3429         unsigned vsz = vec_full_reg_size(s);
3430         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3431                            vec_full_reg_offset(s, a->rn),
3432                            vec_full_reg_offset(s, a->rm),
3433                            vsz, vsz, 0, fns[a->u][a->sz]);
3434     }
3435     return true;
3436 }
3437
3438 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3439 {
3440     static gen_helper_gvec_3 * const fns[2][2] = {
3441         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3442         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3443     };
3444
3445     if (sve_access_check(s)) {
3446         unsigned vsz = vec_full_reg_size(s);
3447         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3448                            vec_full_reg_offset(s, a->rn),
3449                            vec_full_reg_offset(s, a->rm),
3450                            vsz, vsz, a->index, fns[a->u][a->sz]);
3451     }
3452     return true;
3453 }
3454
3455
3456 /*
3457  *** SVE Floating Point Multiply-Add Indexed Group
3458  */
3459
3460 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3461 {
3462     static gen_helper_gvec_4_ptr * const fns[3] = {
3463         gen_helper_gvec_fmla_idx_h,
3464         gen_helper_gvec_fmla_idx_s,
3465         gen_helper_gvec_fmla_idx_d,
3466     };
3467
3468     if (sve_access_check(s)) {
3469         unsigned vsz = vec_full_reg_size(s);
3470         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3471         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3472                            vec_full_reg_offset(s, a->rn),
3473                            vec_full_reg_offset(s, a->rm),
3474                            vec_full_reg_offset(s, a->ra),
3475                            status, vsz, vsz, (a->index << 1) | a->sub,
3476                            fns[a->esz - 1]);
3477         tcg_temp_free_ptr(status);
3478     }
3479     return true;
3480 }
3481
3482 /*
3483  *** SVE Floating Point Multiply Indexed Group
3484  */
3485
3486 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3487 {
3488     static gen_helper_gvec_3_ptr * const fns[3] = {
3489         gen_helper_gvec_fmul_idx_h,
3490         gen_helper_gvec_fmul_idx_s,
3491         gen_helper_gvec_fmul_idx_d,
3492     };
3493
3494     if (sve_access_check(s)) {
3495         unsigned vsz = vec_full_reg_size(s);
3496         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3497         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3498                            vec_full_reg_offset(s, a->rn),
3499                            vec_full_reg_offset(s, a->rm),
3500                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3501         tcg_temp_free_ptr(status);
3502     }
3503     return true;
3504 }
3505
3506 /*
3507  *** SVE Floating Point Fast Reduction Group
3508  */
3509
3510 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3511                                   TCGv_ptr, TCGv_i32);
3512
3513 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3514                       gen_helper_fp_reduce *fn)
3515 {
3516     unsigned vsz = vec_full_reg_size(s);
3517     unsigned p2vsz = pow2ceil(vsz);
3518     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3519     TCGv_ptr t_zn, t_pg, status;
3520     TCGv_i64 temp;
3521
3522     temp = tcg_temp_new_i64();
3523     t_zn = tcg_temp_new_ptr();
3524     t_pg = tcg_temp_new_ptr();
3525
3526     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3527     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3528     status = get_fpstatus_ptr(a->esz == MO_16);
3529
3530     fn(temp, t_zn, t_pg, status, t_desc);
3531     tcg_temp_free_ptr(t_zn);
3532     tcg_temp_free_ptr(t_pg);
3533     tcg_temp_free_ptr(status);
3534     tcg_temp_free_i32(t_desc);
3535
3536     write_fp_dreg(s, a->rd, temp);
3537     tcg_temp_free_i64(temp);
3538 }
3539
3540 #define DO_VPZ(NAME, name) \
3541 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3542 {                                                                        \
3543     static gen_helper_fp_reduce * const fns[3] = {                       \
3544         gen_helper_sve_##name##_h,                                       \
3545         gen_helper_sve_##name##_s,                                       \
3546         gen_helper_sve_##name##_d,                                       \
3547     };                                                                   \
3548     if (a->esz == 0) {                                                   \
3549         return false;                                                    \
3550     }                                                                    \
3551     if (sve_access_check(s)) {                                           \
3552         do_reduce(s, a, fns[a->esz - 1]);                                \
3553     }                                                                    \
3554     return true;                                                         \
3555 }
3556
3557 DO_VPZ(FADDV, faddv)
3558 DO_VPZ(FMINNMV, fminnmv)
3559 DO_VPZ(FMAXNMV, fmaxnmv)
3560 DO_VPZ(FMINV, fminv)
3561 DO_VPZ(FMAXV, fmaxv)
3562
3563 /*
3564  *** SVE Floating Point Unary Operations - Unpredicated Group
3565  */
3566
3567 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3568 {
3569     unsigned vsz = vec_full_reg_size(s);
3570     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3571
3572     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3573                        vec_full_reg_offset(s, a->rn),
3574                        status, vsz, vsz, 0, fn);
3575     tcg_temp_free_ptr(status);
3576 }
3577
3578 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3579 {
3580     static gen_helper_gvec_2_ptr * const fns[3] = {
3581         gen_helper_gvec_frecpe_h,
3582         gen_helper_gvec_frecpe_s,
3583         gen_helper_gvec_frecpe_d,
3584     };
3585     if (a->esz == 0) {
3586         return false;
3587     }
3588     if (sve_access_check(s)) {
3589         do_zz_fp(s, a, fns[a->esz - 1]);
3590     }
3591     return true;
3592 }
3593
3594 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3595 {
3596     static gen_helper_gvec_2_ptr * const fns[3] = {
3597         gen_helper_gvec_frsqrte_h,
3598         gen_helper_gvec_frsqrte_s,
3599         gen_helper_gvec_frsqrte_d,
3600     };
3601     if (a->esz == 0) {
3602         return false;
3603     }
3604     if (sve_access_check(s)) {
3605         do_zz_fp(s, a, fns[a->esz - 1]);
3606     }
3607     return true;
3608 }
3609
3610 /*
3611  *** SVE Floating Point Compare with Zero Group
3612  */
3613
3614 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3615                       gen_helper_gvec_3_ptr *fn)
3616 {
3617     unsigned vsz = vec_full_reg_size(s);
3618     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3619
3620     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3621                        vec_full_reg_offset(s, a->rn),
3622                        pred_full_reg_offset(s, a->pg),
3623                        status, vsz, vsz, 0, fn);
3624     tcg_temp_free_ptr(status);
3625 }
3626
3627 #define DO_PPZ(NAME, name) \
3628 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3629 {                                                                 \
3630     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3631         gen_helper_sve_##name##_h,                                \
3632         gen_helper_sve_##name##_s,                                \
3633         gen_helper_sve_##name##_d,                                \
3634     };                                                            \
3635     if (a->esz == 0) {                                            \
3636         return false;                                             \
3637     }                                                             \
3638     if (sve_access_check(s)) {                                    \
3639         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3640     }                                                             \
3641     return true;                                                  \
3642 }
3643
3644 DO_PPZ(FCMGE_ppz0, fcmge0)
3645 DO_PPZ(FCMGT_ppz0, fcmgt0)
3646 DO_PPZ(FCMLE_ppz0, fcmle0)
3647 DO_PPZ(FCMLT_ppz0, fcmlt0)
3648 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3649 DO_PPZ(FCMNE_ppz0, fcmne0)
3650
3651 #undef DO_PPZ
3652
3653 /*
3654  *** SVE floating-point trig multiply-add coefficient
3655  */
3656
3657 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3658 {
3659     static gen_helper_gvec_3_ptr * const fns[3] = {
3660         gen_helper_sve_ftmad_h,
3661         gen_helper_sve_ftmad_s,
3662         gen_helper_sve_ftmad_d,
3663     };
3664
3665     if (a->esz == 0) {
3666         return false;
3667     }
3668     if (sve_access_check(s)) {
3669         unsigned vsz = vec_full_reg_size(s);
3670         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3671         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3672                            vec_full_reg_offset(s, a->rn),
3673                            vec_full_reg_offset(s, a->rm),
3674                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3675         tcg_temp_free_ptr(status);
3676     }
3677     return true;
3678 }
3679
3680 /*
3681  *** SVE Floating Point Accumulating Reduction Group
3682  */
3683
3684 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3685 {
3686     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3687                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3688     static fadda_fn * const fns[3] = {
3689         gen_helper_sve_fadda_h,
3690         gen_helper_sve_fadda_s,
3691         gen_helper_sve_fadda_d,
3692     };
3693     unsigned vsz = vec_full_reg_size(s);
3694     TCGv_ptr t_rm, t_pg, t_fpst;
3695     TCGv_i64 t_val;
3696     TCGv_i32 t_desc;
3697
3698     if (a->esz == 0) {
3699         return false;
3700     }
3701     if (!sve_access_check(s)) {
3702         return true;
3703     }
3704
3705     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3706     t_rm = tcg_temp_new_ptr();
3707     t_pg = tcg_temp_new_ptr();
3708     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3709     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3710     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3711     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3712
3713     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3714
3715     tcg_temp_free_i32(t_desc);
3716     tcg_temp_free_ptr(t_fpst);
3717     tcg_temp_free_ptr(t_pg);
3718     tcg_temp_free_ptr(t_rm);
3719
3720     write_fp_dreg(s, a->rd, t_val);
3721     tcg_temp_free_i64(t_val);
3722     return true;
3723 }
3724
3725 /*
3726  *** SVE Floating Point Arithmetic - Unpredicated Group
3727  */
3728
3729 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3730                       gen_helper_gvec_3_ptr *fn)
3731 {
3732     if (fn == NULL) {
3733         return false;
3734     }
3735     if (sve_access_check(s)) {
3736         unsigned vsz = vec_full_reg_size(s);
3737         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3738         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3739                            vec_full_reg_offset(s, a->rn),
3740                            vec_full_reg_offset(s, a->rm),
3741                            status, vsz, vsz, 0, fn);
3742         tcg_temp_free_ptr(status);
3743     }
3744     return true;
3745 }
3746
3747
3748 #define DO_FP3(NAME, name) \
3749 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3750 {                                                                   \
3751     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3752         NULL, gen_helper_gvec_##name##_h,                           \
3753         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3754     };                                                              \
3755     return do_zzz_fp(s, a, fns[a->esz]);                            \
3756 }
3757
3758 DO_FP3(FADD_zzz, fadd)
3759 DO_FP3(FSUB_zzz, fsub)
3760 DO_FP3(FMUL_zzz, fmul)
3761 DO_FP3(FTSMUL, ftsmul)
3762 DO_FP3(FRECPS, recps)
3763 DO_FP3(FRSQRTS, rsqrts)
3764
3765 #undef DO_FP3
3766
3767 /*
3768  *** SVE Floating Point Arithmetic - Predicated Group
3769  */
3770
3771 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3772                        gen_helper_gvec_4_ptr *fn)
3773 {
3774     if (fn == NULL) {
3775         return false;
3776     }
3777     if (sve_access_check(s)) {
3778         unsigned vsz = vec_full_reg_size(s);
3779         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3780         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3781                            vec_full_reg_offset(s, a->rn),
3782                            vec_full_reg_offset(s, a->rm),
3783                            pred_full_reg_offset(s, a->pg),
3784                            status, vsz, vsz, 0, fn);
3785         tcg_temp_free_ptr(status);
3786     }
3787     return true;
3788 }
3789
3790 #define DO_FP3(NAME, name) \
3791 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3792 {                                                                   \
3793     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3794         NULL, gen_helper_sve_##name##_h,                            \
3795         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3796     };                                                              \
3797     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3798 }
3799
3800 DO_FP3(FADD_zpzz, fadd)
3801 DO_FP3(FSUB_zpzz, fsub)
3802 DO_FP3(FMUL_zpzz, fmul)
3803 DO_FP3(FMIN_zpzz, fmin)
3804 DO_FP3(FMAX_zpzz, fmax)
3805 DO_FP3(FMINNM_zpzz, fminnum)
3806 DO_FP3(FMAXNM_zpzz, fmaxnum)
3807 DO_FP3(FABD, fabd)
3808 DO_FP3(FSCALE, fscalbn)
3809 DO_FP3(FDIV, fdiv)
3810 DO_FP3(FMULX, fmulx)
3811
3812 #undef DO_FP3
3813
3814 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3815                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3816
3817 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3818                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3819 {
3820     unsigned vsz = vec_full_reg_size(s);
3821     TCGv_ptr t_zd, t_zn, t_pg, status;
3822     TCGv_i32 desc;
3823
3824     t_zd = tcg_temp_new_ptr();
3825     t_zn = tcg_temp_new_ptr();
3826     t_pg = tcg_temp_new_ptr();
3827     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3828     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3829     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3830
3831     status = get_fpstatus_ptr(is_fp16);
3832     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3833     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3834
3835     tcg_temp_free_i32(desc);
3836     tcg_temp_free_ptr(status);
3837     tcg_temp_free_ptr(t_pg);
3838     tcg_temp_free_ptr(t_zn);
3839     tcg_temp_free_ptr(t_zd);
3840 }
3841
3842 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3843                       gen_helper_sve_fp2scalar *fn)
3844 {
3845     TCGv_i64 temp = tcg_const_i64(imm);
3846     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3847     tcg_temp_free_i64(temp);
3848 }
3849
3850 #define DO_FP_IMM(NAME, name, const0, const1) \
3851 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3852 {                                                                         \
3853     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3854         gen_helper_sve_##name##_h,                                        \
3855         gen_helper_sve_##name##_s,                                        \
3856         gen_helper_sve_##name##_d                                         \
3857     };                                                                    \
3858     static uint64_t const val[3][2] = {                                   \
3859         { float16_##const0, float16_##const1 },                           \
3860         { float32_##const0, float32_##const1 },                           \
3861         { float64_##const0, float64_##const1 },                           \
3862     };                                                                    \
3863     if (a->esz == 0) {                                                    \
3864         return false;                                                     \
3865     }                                                                     \
3866     if (sve_access_check(s)) {                                            \
3867         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3868     }                                                                     \
3869     return true;                                                          \
3870 }
3871
3872 #define float16_two  make_float16(0x4000)
3873 #define float32_two  make_float32(0x40000000)
3874 #define float64_two  make_float64(0x4000000000000000ULL)
3875
3876 DO_FP_IMM(FADD, fadds, half, one)
3877 DO_FP_IMM(FSUB, fsubs, half, one)
3878 DO_FP_IMM(FMUL, fmuls, half, two)
3879 DO_FP_IMM(FSUBR, fsubrs, half, one)
3880 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881 DO_FP_IMM(FMINNM, fminnms, zero, one)
3882 DO_FP_IMM(FMAX, fmaxs, zero, one)
3883 DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885 #undef DO_FP_IMM
3886
3887 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888                       gen_helper_gvec_4_ptr *fn)
3889 {
3890     if (fn == NULL) {
3891         return false;
3892     }
3893     if (sve_access_check(s)) {
3894         unsigned vsz = vec_full_reg_size(s);
3895         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897                            vec_full_reg_offset(s, a->rn),
3898                            vec_full_reg_offset(s, a->rm),
3899                            pred_full_reg_offset(s, a->pg),
3900                            status, vsz, vsz, 0, fn);
3901         tcg_temp_free_ptr(status);
3902     }
3903     return true;
3904 }
3905
3906 #define DO_FPCMP(NAME, name) \
3907 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3908 {                                                                     \
3909     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3910         NULL, gen_helper_sve_##name##_h,                              \
3911         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3912     };                                                                \
3913     return do_fp_cmp(s, a, fns[a->esz]);                              \
3914 }
3915
3916 DO_FPCMP(FCMGE, fcmge)
3917 DO_FPCMP(FCMGT, fcmgt)
3918 DO_FPCMP(FCMEQ, fcmeq)
3919 DO_FPCMP(FCMNE, fcmne)
3920 DO_FPCMP(FCMUO, fcmuo)
3921 DO_FPCMP(FACGE, facge)
3922 DO_FPCMP(FACGT, facgt)
3923
3924 #undef DO_FPCMP
3925
3926 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3927 {
3928     static gen_helper_gvec_4_ptr * const fns[3] = {
3929         gen_helper_sve_fcadd_h,
3930         gen_helper_sve_fcadd_s,
3931         gen_helper_sve_fcadd_d
3932     };
3933
3934     if (a->esz == 0) {
3935         return false;
3936     }
3937     if (sve_access_check(s)) {
3938         unsigned vsz = vec_full_reg_size(s);
3939         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3940         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3941                            vec_full_reg_offset(s, a->rn),
3942                            vec_full_reg_offset(s, a->rm),
3943                            pred_full_reg_offset(s, a->pg),
3944                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3945         tcg_temp_free_ptr(status);
3946     }
3947     return true;
3948 }
3949
3950 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3951
3952 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3953 {
3954     if (fn == NULL) {
3955         return false;
3956     }
3957     if (!sve_access_check(s)) {
3958         return true;
3959     }
3960
3961     unsigned vsz = vec_full_reg_size(s);
3962     unsigned desc;
3963     TCGv_i32 t_desc;
3964     TCGv_ptr pg = tcg_temp_new_ptr();
3965
3966     /* We would need 7 operands to pass these arguments "properly".
3967      * So we encode all the register numbers into the descriptor.
3968      */
3969     desc = deposit32(a->rd, 5, 5, a->rn);
3970     desc = deposit32(desc, 10, 5, a->rm);
3971     desc = deposit32(desc, 15, 5, a->ra);
3972     desc = simd_desc(vsz, vsz, desc);
3973
3974     t_desc = tcg_const_i32(desc);
3975     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3976     fn(cpu_env, pg, t_desc);
3977     tcg_temp_free_i32(t_desc);
3978     tcg_temp_free_ptr(pg);
3979     return true;
3980 }
3981
3982 #define DO_FMLA(NAME, name) \
3983 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3984 {                                                                    \
3985     static gen_helper_sve_fmla * const fns[4] = {                    \
3986         NULL, gen_helper_sve_##name##_h,                             \
3987         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3988     };                                                               \
3989     return do_fmla(s, a, fns[a->esz]);                               \
3990 }
3991
3992 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3993 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3994 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3995 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3996
3997 #undef DO_FMLA
3998
3999 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4000 {
4001     static gen_helper_sve_fmla * const fns[3] = {
4002         gen_helper_sve_fcmla_zpzzz_h,
4003         gen_helper_sve_fcmla_zpzzz_s,
4004         gen_helper_sve_fcmla_zpzzz_d,
4005     };
4006
4007     if (a->esz == 0) {
4008         return false;
4009     }
4010     if (sve_access_check(s)) {
4011         unsigned vsz = vec_full_reg_size(s);
4012         unsigned desc;
4013         TCGv_i32 t_desc;
4014         TCGv_ptr pg = tcg_temp_new_ptr();
4015
4016         /* We would need 7 operands to pass these arguments "properly".
4017          * So we encode all the register numbers into the descriptor.
4018          */
4019         desc = deposit32(a->rd, 5, 5, a->rn);
4020         desc = deposit32(desc, 10, 5, a->rm);
4021         desc = deposit32(desc, 15, 5, a->ra);
4022         desc = deposit32(desc, 20, 2, a->rot);
4023         desc = sextract32(desc, 0, 22);
4024         desc = simd_desc(vsz, vsz, desc);
4025
4026         t_desc = tcg_const_i32(desc);
4027         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4028         fns[a->esz - 1](cpu_env, pg, t_desc);
4029         tcg_temp_free_i32(t_desc);
4030         tcg_temp_free_ptr(pg);
4031     }
4032     return true;
4033 }
4034
4035 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4036 {
4037     static gen_helper_gvec_3_ptr * const fns[2] = {
4038         gen_helper_gvec_fcmlah_idx,
4039         gen_helper_gvec_fcmlas_idx,
4040     };
4041
4042     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4043     tcg_debug_assert(a->rd == a->ra);
4044     if (sve_access_check(s)) {
4045         unsigned vsz = vec_full_reg_size(s);
4046         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4047         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4048                            vec_full_reg_offset(s, a->rn),
4049                            vec_full_reg_offset(s, a->rm),
4050                            status, vsz, vsz,
4051                            a->index * 4 + a->rot,
4052                            fns[a->esz - 1]);
4053         tcg_temp_free_ptr(status);
4054     }
4055     return true;
4056 }
4057
4058 /*
4059  *** SVE Floating Point Unary Operations Predicated Group
4060  */
4061
4062 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4063                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4064 {
4065     if (sve_access_check(s)) {
4066         unsigned vsz = vec_full_reg_size(s);
4067         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4068         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4069                            vec_full_reg_offset(s, rn),
4070                            pred_full_reg_offset(s, pg),
4071                            status, vsz, vsz, 0, fn);
4072         tcg_temp_free_ptr(status);
4073     }
4074     return true;
4075 }
4076
4077 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4078 {
4079     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4080 }
4081
4082 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4083 {
4084     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4085 }
4086
4087 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4088 {
4089     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4090 }
4091
4092 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4093 {
4094     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4095 }
4096
4097 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4098 {
4099     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4100 }
4101
4102 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4103 {
4104     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4105 }
4106
4107 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4108 {
4109     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4110 }
4111
4112 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4113 {
4114     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4115 }
4116
4117 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4118 {
4119     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4120 }
4121
4122 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4123 {
4124     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4125 }
4126
4127 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4128 {
4129     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4130 }
4131
4132 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4133 {
4134     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4135 }
4136
4137 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4138 {
4139     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4140 }
4141
4142 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4143 {
4144     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4145 }
4146
4147 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4148 {
4149     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4150 }
4151
4152 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4153 {
4154     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4155 }
4156
4157 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4158 {
4159     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4160 }
4161
4162 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4163 {
4164     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4165 }
4166
4167 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4168 {
4169     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4170 }
4171
4172 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4173 {
4174     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4175 }
4176
4177 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4178     gen_helper_sve_frint_h,
4179     gen_helper_sve_frint_s,
4180     gen_helper_sve_frint_d
4181 };
4182
4183 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4184 {
4185     if (a->esz == 0) {
4186         return false;
4187     }
4188     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4189                       frint_fns[a->esz - 1]);
4190 }
4191
4192 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4193 {
4194     static gen_helper_gvec_3_ptr * const fns[3] = {
4195         gen_helper_sve_frintx_h,
4196         gen_helper_sve_frintx_s,
4197         gen_helper_sve_frintx_d
4198     };
4199     if (a->esz == 0) {
4200         return false;
4201     }
4202     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4203 }
4204
4205 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4206 {
4207     if (a->esz == 0) {
4208         return false;
4209     }
4210     if (sve_access_check(s)) {
4211         unsigned vsz = vec_full_reg_size(s);
4212         TCGv_i32 tmode = tcg_const_i32(mode);
4213         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4214
4215         gen_helper_set_rmode(tmode, tmode, status);
4216
4217         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4218                            vec_full_reg_offset(s, a->rn),
4219                            pred_full_reg_offset(s, a->pg),
4220                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4221
4222         gen_helper_set_rmode(tmode, tmode, status);
4223         tcg_temp_free_i32(tmode);
4224         tcg_temp_free_ptr(status);
4225     }
4226     return true;
4227 }
4228
4229 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4230 {
4231     return do_frint_mode(s, a, float_round_nearest_even);
4232 }
4233
4234 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4235 {
4236     return do_frint_mode(s, a, float_round_up);
4237 }
4238
4239 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4240 {
4241     return do_frint_mode(s, a, float_round_down);
4242 }
4243
4244 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4245 {
4246     return do_frint_mode(s, a, float_round_to_zero);
4247 }
4248
4249 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4250 {
4251     return do_frint_mode(s, a, float_round_ties_away);
4252 }
4253
4254 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4255 {
4256     static gen_helper_gvec_3_ptr * const fns[3] = {
4257         gen_helper_sve_frecpx_h,
4258         gen_helper_sve_frecpx_s,
4259         gen_helper_sve_frecpx_d
4260     };
4261     if (a->esz == 0) {
4262         return false;
4263     }
4264     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4265 }
4266
4267 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4268 {
4269     static gen_helper_gvec_3_ptr * const fns[3] = {
4270         gen_helper_sve_fsqrt_h,
4271         gen_helper_sve_fsqrt_s,
4272         gen_helper_sve_fsqrt_d
4273     };
4274     if (a->esz == 0) {
4275         return false;
4276     }
4277     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4278 }
4279
4280 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4281 {
4282     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4283 }
4284
4285 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4286 {
4287     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4288 }
4289
4290 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4291 {
4292     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4293 }
4294
4295 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4296 {
4297     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4298 }
4299
4300 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4301 {
4302     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4303 }
4304
4305 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4306 {
4307     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4308 }
4309
4310 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4311 {
4312     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4313 }
4314
4315 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4316 {
4317     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4318 }
4319
4320 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4321 {
4322     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4323 }
4324
4325 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4326 {
4327     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4328 }
4329
4330 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4331 {
4332     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4333 }
4334
4335 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4336 {
4337     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4338 }
4339
4340 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4341 {
4342     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4343 }
4344
4345 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4346 {
4347     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4348 }
4349
4350 /*
4351  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4352  */
4353
4354 /* Subroutine loading a vector register at VOFS of LEN bytes.
4355  * The load should begin at the address Rn + IMM.
4356  */
4357
4358 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4359 {
4360     int len_align = QEMU_ALIGN_DOWN(len, 8);
4361     int len_remain = len % 8;
4362     int nparts = len / 8 + ctpop8(len_remain);
4363     int midx = get_mem_index(s);
4364     TCGv_i64 addr, t0, t1;
4365
4366     addr = tcg_temp_new_i64();
4367     t0 = tcg_temp_new_i64();
4368
4369     /* Note that unpredicated load/store of vector/predicate registers
4370      * are defined as a stream of bytes, which equates to little-endian
4371      * operations on larger quantities.  There is no nice way to force
4372      * a little-endian load for aarch64_be-linux-user out of line.
4373      *
4374      * Attempt to keep code expansion to a minimum by limiting the
4375      * amount of unrolling done.
4376      */
4377     if (nparts <= 4) {
4378         int i;
4379
4380         for (i = 0; i < len_align; i += 8) {
4381             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4382             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4383             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4384         }
4385     } else {
4386         TCGLabel *loop = gen_new_label();
4387         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4388
4389         gen_set_label(loop);
4390
4391         /* Minimize the number of local temps that must be re-read from
4392          * the stack each iteration.  Instead, re-compute values other
4393          * than the loop counter.
4394          */
4395         tp = tcg_temp_new_ptr();
4396         tcg_gen_addi_ptr(tp, i, imm);
4397         tcg_gen_extu_ptr_i64(addr, tp);
4398         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4399
4400         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4401
4402         tcg_gen_add_ptr(tp, cpu_env, i);
4403         tcg_gen_addi_ptr(i, i, 8);
4404         tcg_gen_st_i64(t0, tp, vofs);
4405         tcg_temp_free_ptr(tp);
4406
4407         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408         tcg_temp_free_ptr(i);
4409     }
4410
4411     /* Predicate register loads can be any multiple of 2.
4412      * Note that we still store the entire 64-bit unit into cpu_env.
4413      */
4414     if (len_remain) {
4415         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4416
4417         switch (len_remain) {
4418         case 2:
4419         case 4:
4420         case 8:
4421             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4422             break;
4423
4424         case 6:
4425             t1 = tcg_temp_new_i64();
4426             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4427             tcg_gen_addi_i64(addr, addr, 4);
4428             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4429             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4430             tcg_temp_free_i64(t1);
4431             break;
4432
4433         default:
4434             g_assert_not_reached();
4435         }
4436         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4437     }
4438     tcg_temp_free_i64(addr);
4439     tcg_temp_free_i64(t0);
4440 }
4441
4442 /* Similarly for stores.  */
4443 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4444 {
4445     int len_align = QEMU_ALIGN_DOWN(len, 8);
4446     int len_remain = len % 8;
4447     int nparts = len / 8 + ctpop8(len_remain);
4448     int midx = get_mem_index(s);
4449     TCGv_i64 addr, t0;
4450
4451     addr = tcg_temp_new_i64();
4452     t0 = tcg_temp_new_i64();
4453
4454     /* Note that unpredicated load/store of vector/predicate registers
4455      * are defined as a stream of bytes, which equates to little-endian
4456      * operations on larger quantities.  There is no nice way to force
4457      * a little-endian store for aarch64_be-linux-user out of line.
4458      *
4459      * Attempt to keep code expansion to a minimum by limiting the
4460      * amount of unrolling done.
4461      */
4462     if (nparts <= 4) {
4463         int i;
4464
4465         for (i = 0; i < len_align; i += 8) {
4466             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4467             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4468             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4469         }
4470     } else {
4471         TCGLabel *loop = gen_new_label();
4472         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4473
4474         gen_set_label(loop);
4475
4476         t2 = tcg_temp_new_ptr();
4477         tcg_gen_add_ptr(t2, cpu_env, i);
4478         tcg_gen_ld_i64(t0, t2, vofs);
4479
4480         /* Minimize the number of local temps that must be re-read from
4481          * the stack each iteration.  Instead, re-compute values other
4482          * than the loop counter.
4483          */
4484         tcg_gen_addi_ptr(t2, i, imm);
4485         tcg_gen_extu_ptr_i64(addr, t2);
4486         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4487         tcg_temp_free_ptr(t2);
4488
4489         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4490
4491         tcg_gen_addi_ptr(i, i, 8);
4492
4493         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4494         tcg_temp_free_ptr(i);
4495     }
4496
4497     /* Predicate register stores can be any multiple of 2.  */
4498     if (len_remain) {
4499         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4500         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4501
4502         switch (len_remain) {
4503         case 2:
4504         case 4:
4505         case 8:
4506             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4507             break;
4508
4509         case 6:
4510             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4511             tcg_gen_addi_i64(addr, addr, 4);
4512             tcg_gen_shri_i64(t0, t0, 32);
4513             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4514             break;
4515
4516         default:
4517             g_assert_not_reached();
4518         }
4519     }
4520     tcg_temp_free_i64(addr);
4521     tcg_temp_free_i64(t0);
4522 }
4523
4524 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4525 {
4526     if (sve_access_check(s)) {
4527         int size = vec_full_reg_size(s);
4528         int off = vec_full_reg_offset(s, a->rd);
4529         do_ldr(s, off, size, a->rn, a->imm * size);
4530     }
4531     return true;
4532 }
4533
4534 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4535 {
4536     if (sve_access_check(s)) {
4537         int size = pred_full_reg_size(s);
4538         int off = pred_full_reg_offset(s, a->rd);
4539         do_ldr(s, off, size, a->rn, a->imm * size);
4540     }
4541     return true;
4542 }
4543
4544 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4545 {
4546     if (sve_access_check(s)) {
4547         int size = vec_full_reg_size(s);
4548         int off = vec_full_reg_offset(s, a->rd);
4549         do_str(s, off, size, a->rn, a->imm * size);
4550     }
4551     return true;
4552 }
4553
4554 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4555 {
4556     if (sve_access_check(s)) {
4557         int size = pred_full_reg_size(s);
4558         int off = pred_full_reg_offset(s, a->rd);
4559         do_str(s, off, size, a->rn, a->imm * size);
4560     }
4561     return true;
4562 }
4563
4564 /*
4565  *** SVE Memory - Contiguous Load Group
4566  */
4567
4568 /* The memory mode of the dtype.  */
4569 static const TCGMemOp dtype_mop[16] = {
4570     MO_UB, MO_UB, MO_UB, MO_UB,
4571     MO_SL, MO_UW, MO_UW, MO_UW,
4572     MO_SW, MO_SW, MO_UL, MO_UL,
4573     MO_SB, MO_SB, MO_SB, MO_Q
4574 };
4575
4576 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4577
4578 /* The vector element size of dtype.  */
4579 static const uint8_t dtype_esz[16] = {
4580     0, 1, 2, 3,
4581     3, 1, 2, 3,
4582     3, 2, 2, 3,
4583     3, 2, 1, 3
4584 };
4585
4586 static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4587 {
4588     return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4589 }
4590
4591 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4592                        int dtype, gen_helper_gvec_mem *fn)
4593 {
4594     unsigned vsz = vec_full_reg_size(s);
4595     TCGv_ptr t_pg;
4596     TCGv_i32 t_desc;
4597     int desc;
4598
4599     /* For e.g. LD4, there are not enough arguments to pass all 4
4600      * registers as pointers, so encode the regno into the data field.
4601      * For consistency, do this even for LD1.
4602      */
4603     desc = sve_memopidx(s, dtype);
4604     desc |= zt << MEMOPIDX_SHIFT;
4605     desc = simd_desc(vsz, vsz, desc);
4606     t_desc = tcg_const_i32(desc);
4607     t_pg = tcg_temp_new_ptr();
4608
4609     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4610     fn(cpu_env, t_pg, addr, t_desc);
4611
4612     tcg_temp_free_ptr(t_pg);
4613     tcg_temp_free_i32(t_desc);
4614 }
4615
4616 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4617                       TCGv_i64 addr, int dtype, int nreg)
4618 {
4619     static gen_helper_gvec_mem * const fns[2][16][4] = {
4620         /* Little-endian */
4621         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4622             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4623           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4624           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4625           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626
4627           { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4628           { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4629             gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4630           { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4631           { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4632
4633           { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4634           { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4635           { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4636             gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4637           { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4638
4639           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4640           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4641           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4642           { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4643             gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4644
4645         /* Big-endian */
4646         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4647             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4648           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4649           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4650           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4651
4652           { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4653           { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4654             gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4655           { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4656           { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4657
4658           { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4659           { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4660           { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4661             gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4662           { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4663
4664           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4665           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4666           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4667           { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4668             gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4669     };
4670     gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4671
4672     /* While there are holes in the table, they are not
4673      * accessible via the instruction encoding.
4674      */
4675     assert(fn != NULL);
4676     do_mem_zpa(s, zt, pg, addr, dtype, fn);
4677 }
4678
4679 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4680 {
4681     if (a->rm == 31) {
4682         return false;
4683     }
4684     if (sve_access_check(s)) {
4685         TCGv_i64 addr = new_tmp_a64(s);
4686         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4687         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4688         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4689     }
4690     return true;
4691 }
4692
4693 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4694 {
4695     if (sve_access_check(s)) {
4696         int vsz = vec_full_reg_size(s);
4697         int elements = vsz >> dtype_esz[a->dtype];
4698         TCGv_i64 addr = new_tmp_a64(s);
4699
4700         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4701                          (a->imm * elements * (a->nreg + 1))
4702                          << dtype_msz(a->dtype));
4703         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4704     }
4705     return true;
4706 }
4707
4708 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4709 {
4710     static gen_helper_gvec_mem * const fns[2][16] = {
4711         /* Little-endian */
4712         { gen_helper_sve_ldff1bb_r,
4713           gen_helper_sve_ldff1bhu_r,
4714           gen_helper_sve_ldff1bsu_r,
4715           gen_helper_sve_ldff1bdu_r,
4716
4717           gen_helper_sve_ldff1sds_le_r,
4718           gen_helper_sve_ldff1hh_le_r,
4719           gen_helper_sve_ldff1hsu_le_r,
4720           gen_helper_sve_ldff1hdu_le_r,
4721
4722           gen_helper_sve_ldff1hds_le_r,
4723           gen_helper_sve_ldff1hss_le_r,
4724           gen_helper_sve_ldff1ss_le_r,
4725           gen_helper_sve_ldff1sdu_le_r,
4726
4727           gen_helper_sve_ldff1bds_r,
4728           gen_helper_sve_ldff1bss_r,
4729           gen_helper_sve_ldff1bhs_r,
4730           gen_helper_sve_ldff1dd_le_r },
4731
4732         /* Big-endian */
4733         { gen_helper_sve_ldff1bb_r,
4734           gen_helper_sve_ldff1bhu_r,
4735           gen_helper_sve_ldff1bsu_r,
4736           gen_helper_sve_ldff1bdu_r,
4737
4738           gen_helper_sve_ldff1sds_be_r,
4739           gen_helper_sve_ldff1hh_be_r,
4740           gen_helper_sve_ldff1hsu_be_r,
4741           gen_helper_sve_ldff1hdu_be_r,
4742
4743           gen_helper_sve_ldff1hds_be_r,
4744           gen_helper_sve_ldff1hss_be_r,
4745           gen_helper_sve_ldff1ss_be_r,
4746           gen_helper_sve_ldff1sdu_be_r,
4747
4748           gen_helper_sve_ldff1bds_r,
4749           gen_helper_sve_ldff1bss_r,
4750           gen_helper_sve_ldff1bhs_r,
4751           gen_helper_sve_ldff1dd_be_r },
4752     };
4753
4754     if (sve_access_check(s)) {
4755         TCGv_i64 addr = new_tmp_a64(s);
4756         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4757         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4758         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4759                    fns[s->be_data == MO_BE][a->dtype]);
4760     }
4761     return true;
4762 }
4763
4764 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4765 {
4766     static gen_helper_gvec_mem * const fns[2][16] = {
4767         /* Little-endian */
4768         { gen_helper_sve_ldnf1bb_r,
4769           gen_helper_sve_ldnf1bhu_r,
4770           gen_helper_sve_ldnf1bsu_r,
4771           gen_helper_sve_ldnf1bdu_r,
4772
4773           gen_helper_sve_ldnf1sds_le_r,
4774           gen_helper_sve_ldnf1hh_le_r,
4775           gen_helper_sve_ldnf1hsu_le_r,
4776           gen_helper_sve_ldnf1hdu_le_r,
4777
4778           gen_helper_sve_ldnf1hds_le_r,
4779           gen_helper_sve_ldnf1hss_le_r,
4780           gen_helper_sve_ldnf1ss_le_r,
4781           gen_helper_sve_ldnf1sdu_le_r,
4782
4783           gen_helper_sve_ldnf1bds_r,
4784           gen_helper_sve_ldnf1bss_r,
4785           gen_helper_sve_ldnf1bhs_r,
4786           gen_helper_sve_ldnf1dd_le_r },
4787
4788         /* Big-endian */
4789         { gen_helper_sve_ldnf1bb_r,
4790           gen_helper_sve_ldnf1bhu_r,
4791           gen_helper_sve_ldnf1bsu_r,
4792           gen_helper_sve_ldnf1bdu_r,
4793
4794           gen_helper_sve_ldnf1sds_be_r,
4795           gen_helper_sve_ldnf1hh_be_r,
4796           gen_helper_sve_ldnf1hsu_be_r,
4797           gen_helper_sve_ldnf1hdu_be_r,
4798
4799           gen_helper_sve_ldnf1hds_be_r,
4800           gen_helper_sve_ldnf1hss_be_r,
4801           gen_helper_sve_ldnf1ss_be_r,
4802           gen_helper_sve_ldnf1sdu_be_r,
4803
4804           gen_helper_sve_ldnf1bds_r,
4805           gen_helper_sve_ldnf1bss_r,
4806           gen_helper_sve_ldnf1bhs_r,
4807           gen_helper_sve_ldnf1dd_be_r },
4808     };
4809
4810     if (sve_access_check(s)) {
4811         int vsz = vec_full_reg_size(s);
4812         int elements = vsz >> dtype_esz[a->dtype];
4813         int off = (a->imm * elements) << dtype_msz(a->dtype);
4814         TCGv_i64 addr = new_tmp_a64(s);
4815
4816         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4817         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4818                    fns[s->be_data == MO_BE][a->dtype]);
4819     }
4820     return true;
4821 }
4822
4823 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4824 {
4825     static gen_helper_gvec_mem * const fns[2][4] = {
4826         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4827           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4828         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4829           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4830     };
4831     unsigned vsz = vec_full_reg_size(s);
4832     TCGv_ptr t_pg;
4833     TCGv_i32 t_desc;
4834     int desc, poff;
4835
4836     /* Load the first quadword using the normal predicated load helpers.  */
4837     desc = sve_memopidx(s, msz_dtype(msz));
4838     desc |= zt << MEMOPIDX_SHIFT;
4839     desc = simd_desc(16, 16, desc);
4840     t_desc = tcg_const_i32(desc);
4841
4842     poff = pred_full_reg_offset(s, pg);
4843     if (vsz > 16) {
4844         /*
4845          * Zero-extend the first 16 bits of the predicate into a temporary.
4846          * This avoids triggering an assert making sure we don't have bits
4847          * set within a predicate beyond VQ, but we have lowered VQ to 1
4848          * for this load operation.
4849          */
4850         TCGv_i64 tmp = tcg_temp_new_i64();
4851 #ifdef HOST_WORDS_BIGENDIAN
4852         poff += 6;
4853 #endif
4854         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4855
4856         poff = offsetof(CPUARMState, vfp.preg_tmp);
4857         tcg_gen_st_i64(tmp, cpu_env, poff);
4858         tcg_temp_free_i64(tmp);
4859     }
4860
4861     t_pg = tcg_temp_new_ptr();
4862     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4863
4864     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4865
4866     tcg_temp_free_ptr(t_pg);
4867     tcg_temp_free_i32(t_desc);
4868
4869     /* Replicate that first quadword.  */
4870     if (vsz > 16) {
4871         unsigned dofs = vec_full_reg_offset(s, zt);
4872         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4873     }
4874 }
4875
4876 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4877 {
4878     if (a->rm == 31) {
4879         return false;
4880     }
4881     if (sve_access_check(s)) {
4882         int msz = dtype_msz(a->dtype);
4883         TCGv_i64 addr = new_tmp_a64(s);
4884         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4885         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4886         do_ldrq(s, a->rd, a->pg, addr, msz);
4887     }
4888     return true;
4889 }
4890
4891 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4892 {
4893     if (sve_access_check(s)) {
4894         TCGv_i64 addr = new_tmp_a64(s);
4895         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4896         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4897     }
4898     return true;
4899 }
4900
4901 /* Load and broadcast element.  */
4902 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4903 {
4904     if (!sve_access_check(s)) {
4905         return true;
4906     }
4907
4908     unsigned vsz = vec_full_reg_size(s);
4909     unsigned psz = pred_full_reg_size(s);
4910     unsigned esz = dtype_esz[a->dtype];
4911     unsigned msz = dtype_msz(a->dtype);
4912     TCGLabel *over = gen_new_label();
4913     TCGv_i64 temp;
4914
4915     /* If the guarding predicate has no bits set, no load occurs.  */
4916     if (psz <= 8) {
4917         /* Reduce the pred_esz_masks value simply to reduce the
4918          * size of the code generated here.
4919          */
4920         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4921         temp = tcg_temp_new_i64();
4922         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4923         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4924         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4925         tcg_temp_free_i64(temp);
4926     } else {
4927         TCGv_i32 t32 = tcg_temp_new_i32();
4928         find_last_active(s, t32, esz, a->pg);
4929         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4930         tcg_temp_free_i32(t32);
4931     }
4932
4933     /* Load the data.  */
4934     temp = tcg_temp_new_i64();
4935     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4936     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4937                         s->be_data | dtype_mop[a->dtype]);
4938
4939     /* Broadcast to *all* elements.  */
4940     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4941                          vsz, vsz, temp);
4942     tcg_temp_free_i64(temp);
4943
4944     /* Zero the inactive elements.  */
4945     gen_set_label(over);
4946     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4947     return true;
4948 }
4949
4950 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4951                       int msz, int esz, int nreg)
4952 {
4953     static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4954         { { gen_helper_sve_st1bb_r,
4955             gen_helper_sve_st1bh_r,
4956             gen_helper_sve_st1bs_r,
4957             gen_helper_sve_st1bd_r },
4958           { NULL,
4959             gen_helper_sve_st1hh_le_r,
4960             gen_helper_sve_st1hs_le_r,
4961             gen_helper_sve_st1hd_le_r },
4962           { NULL, NULL,
4963             gen_helper_sve_st1ss_le_r,
4964             gen_helper_sve_st1sd_le_r },
4965           { NULL, NULL, NULL,
4966             gen_helper_sve_st1dd_le_r } },
4967         { { gen_helper_sve_st1bb_r,
4968             gen_helper_sve_st1bh_r,
4969             gen_helper_sve_st1bs_r,
4970             gen_helper_sve_st1bd_r },
4971           { NULL,
4972             gen_helper_sve_st1hh_be_r,
4973             gen_helper_sve_st1hs_be_r,
4974             gen_helper_sve_st1hd_be_r },
4975           { NULL, NULL,
4976             gen_helper_sve_st1ss_be_r,
4977             gen_helper_sve_st1sd_be_r },
4978           { NULL, NULL, NULL,
4979             gen_helper_sve_st1dd_be_r } },
4980     };
4981     static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4982         { { gen_helper_sve_st2bb_r,
4983             gen_helper_sve_st2hh_le_r,
4984             gen_helper_sve_st2ss_le_r,
4985             gen_helper_sve_st2dd_le_r },
4986           { gen_helper_sve_st3bb_r,
4987             gen_helper_sve_st3hh_le_r,
4988             gen_helper_sve_st3ss_le_r,
4989             gen_helper_sve_st3dd_le_r },
4990           { gen_helper_sve_st4bb_r,
4991             gen_helper_sve_st4hh_le_r,
4992             gen_helper_sve_st4ss_le_r,
4993             gen_helper_sve_st4dd_le_r } },
4994         { { gen_helper_sve_st2bb_r,
4995             gen_helper_sve_st2hh_be_r,
4996             gen_helper_sve_st2ss_be_r,
4997             gen_helper_sve_st2dd_be_r },
4998           { gen_helper_sve_st3bb_r,
4999             gen_helper_sve_st3hh_be_r,
5000             gen_helper_sve_st3ss_be_r,
5001             gen_helper_sve_st3dd_be_r },
5002           { gen_helper_sve_st4bb_r,
5003             gen_helper_sve_st4hh_be_r,
5004             gen_helper_sve_st4ss_be_r,
5005             gen_helper_sve_st4dd_be_r } },
5006     };
5007     gen_helper_gvec_mem *fn;
5008     int be = s->be_data == MO_BE;
5009
5010     if (nreg == 0) {
5011         /* ST1 */
5012         fn = fn_single[be][msz][esz];
5013     } else {
5014         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5015         assert(msz == esz);
5016         fn = fn_multiple[be][nreg - 1][msz];
5017     }
5018     assert(fn != NULL);
5019     do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5020 }
5021
5022 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5023 {
5024     if (a->rm == 31 || a->msz > a->esz) {
5025         return false;
5026     }
5027     if (sve_access_check(s)) {
5028         TCGv_i64 addr = new_tmp_a64(s);
5029         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5030         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5031         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5032     }
5033     return true;
5034 }
5035
5036 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5037 {
5038     if (a->msz > a->esz) {
5039         return false;
5040     }
5041     if (sve_access_check(s)) {
5042         int vsz = vec_full_reg_size(s);
5043         int elements = vsz >> a->esz;
5044         TCGv_i64 addr = new_tmp_a64(s);
5045
5046         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5047                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5048         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5049     }
5050     return true;
5051 }
5052
5053 /*
5054  *** SVE gather loads / scatter stores
5055  */
5056
5057 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5058                        int scale, TCGv_i64 scalar, int msz,
5059                        gen_helper_gvec_mem_scatter *fn)
5060 {
5061     unsigned vsz = vec_full_reg_size(s);
5062     TCGv_ptr t_zm = tcg_temp_new_ptr();
5063     TCGv_ptr t_pg = tcg_temp_new_ptr();
5064     TCGv_ptr t_zt = tcg_temp_new_ptr();
5065     TCGv_i32 t_desc;
5066     int desc;
5067
5068     desc = sve_memopidx(s, msz_dtype(msz));
5069     desc |= scale << MEMOPIDX_SHIFT;
5070     desc = simd_desc(vsz, vsz, desc);
5071     t_desc = tcg_const_i32(desc);
5072
5073     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5074     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5075     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5076     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5077
5078     tcg_temp_free_ptr(t_zt);
5079     tcg_temp_free_ptr(t_zm);
5080     tcg_temp_free_ptr(t_pg);
5081     tcg_temp_free_i32(t_desc);
5082 }
5083
5084 /* Indexed by [be][ff][xs][u][msz].  */
5085 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5086     /* Little-endian */
5087     { { { { gen_helper_sve_ldbss_zsu,
5088             gen_helper_sve_ldhss_le_zsu,
5089             NULL, },
5090           { gen_helper_sve_ldbsu_zsu,
5091             gen_helper_sve_ldhsu_le_zsu,
5092             gen_helper_sve_ldss_le_zsu, } },
5093         { { gen_helper_sve_ldbss_zss,
5094             gen_helper_sve_ldhss_le_zss,
5095             NULL, },
5096           { gen_helper_sve_ldbsu_zss,
5097             gen_helper_sve_ldhsu_le_zss,
5098             gen_helper_sve_ldss_le_zss, } } },
5099
5100       /* First-fault */
5101       { { { gen_helper_sve_ldffbss_zsu,
5102             gen_helper_sve_ldffhss_le_zsu,
5103             NULL, },
5104           { gen_helper_sve_ldffbsu_zsu,
5105             gen_helper_sve_ldffhsu_le_zsu,
5106             gen_helper_sve_ldffss_le_zsu, } },
5107         { { gen_helper_sve_ldffbss_zss,
5108             gen_helper_sve_ldffhss_le_zss,
5109             NULL, },
5110           { gen_helper_sve_ldffbsu_zss,
5111             gen_helper_sve_ldffhsu_le_zss,
5112             gen_helper_sve_ldffss_le_zss, } } } },
5113
5114     /* Big-endian */
5115     { { { { gen_helper_sve_ldbss_zsu,
5116             gen_helper_sve_ldhss_be_zsu,
5117             NULL, },
5118           { gen_helper_sve_ldbsu_zsu,
5119             gen_helper_sve_ldhsu_be_zsu,
5120             gen_helper_sve_ldss_be_zsu, } },
5121         { { gen_helper_sve_ldbss_zss,
5122             gen_helper_sve_ldhss_be_zss,
5123             NULL, },
5124           { gen_helper_sve_ldbsu_zss,
5125             gen_helper_sve_ldhsu_be_zss,
5126             gen_helper_sve_ldss_be_zss, } } },
5127
5128       /* First-fault */
5129       { { { gen_helper_sve_ldffbss_zsu,
5130             gen_helper_sve_ldffhss_be_zsu,
5131             NULL, },
5132           { gen_helper_sve_ldffbsu_zsu,
5133             gen_helper_sve_ldffhsu_be_zsu,
5134             gen_helper_sve_ldffss_be_zsu, } },
5135         { { gen_helper_sve_ldffbss_zss,
5136             gen_helper_sve_ldffhss_be_zss,
5137             NULL, },
5138           { gen_helper_sve_ldffbsu_zss,
5139             gen_helper_sve_ldffhsu_be_zss,
5140             gen_helper_sve_ldffss_be_zss, } } } },
5141 };
5142
5143 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5144 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5145     /* Little-endian */
5146     { { { { gen_helper_sve_ldbds_zsu,
5147             gen_helper_sve_ldhds_le_zsu,
5148             gen_helper_sve_ldsds_le_zsu,
5149             NULL, },
5150           { gen_helper_sve_ldbdu_zsu,
5151             gen_helper_sve_ldhdu_le_zsu,
5152             gen_helper_sve_ldsdu_le_zsu,
5153             gen_helper_sve_lddd_le_zsu, } },
5154         { { gen_helper_sve_ldbds_zss,
5155             gen_helper_sve_ldhds_le_zss,
5156             gen_helper_sve_ldsds_le_zss,
5157             NULL, },
5158           { gen_helper_sve_ldbdu_zss,
5159             gen_helper_sve_ldhdu_le_zss,
5160             gen_helper_sve_ldsdu_le_zss,
5161             gen_helper_sve_lddd_le_zss, } },
5162         { { gen_helper_sve_ldbds_zd,
5163             gen_helper_sve_ldhds_le_zd,
5164             gen_helper_sve_ldsds_le_zd,
5165             NULL, },
5166           { gen_helper_sve_ldbdu_zd,
5167             gen_helper_sve_ldhdu_le_zd,
5168             gen_helper_sve_ldsdu_le_zd,
5169             gen_helper_sve_lddd_le_zd, } } },
5170
5171       /* First-fault */
5172       { { { gen_helper_sve_ldffbds_zsu,
5173             gen_helper_sve_ldffhds_le_zsu,
5174             gen_helper_sve_ldffsds_le_zsu,
5175             NULL, },
5176           { gen_helper_sve_ldffbdu_zsu,
5177             gen_helper_sve_ldffhdu_le_zsu,
5178             gen_helper_sve_ldffsdu_le_zsu,
5179             gen_helper_sve_ldffdd_le_zsu, } },
5180         { { gen_helper_sve_ldffbds_zss,
5181             gen_helper_sve_ldffhds_le_zss,
5182             gen_helper_sve_ldffsds_le_zss,
5183             NULL, },
5184           { gen_helper_sve_ldffbdu_zss,
5185             gen_helper_sve_ldffhdu_le_zss,
5186             gen_helper_sve_ldffsdu_le_zss,
5187             gen_helper_sve_ldffdd_le_zss, } },
5188         { { gen_helper_sve_ldffbds_zd,
5189             gen_helper_sve_ldffhds_le_zd,
5190             gen_helper_sve_ldffsds_le_zd,
5191             NULL, },
5192           { gen_helper_sve_ldffbdu_zd,
5193             gen_helper_sve_ldffhdu_le_zd,
5194             gen_helper_sve_ldffsdu_le_zd,
5195             gen_helper_sve_ldffdd_le_zd, } } } },
5196
5197     /* Big-endian */
5198     { { { { gen_helper_sve_ldbds_zsu,
5199             gen_helper_sve_ldhds_be_zsu,
5200             gen_helper_sve_ldsds_be_zsu,
5201             NULL, },
5202           { gen_helper_sve_ldbdu_zsu,
5203             gen_helper_sve_ldhdu_be_zsu,
5204             gen_helper_sve_ldsdu_be_zsu,
5205             gen_helper_sve_lddd_be_zsu, } },
5206         { { gen_helper_sve_ldbds_zss,
5207             gen_helper_sve_ldhds_be_zss,
5208             gen_helper_sve_ldsds_be_zss,
5209             NULL, },
5210           { gen_helper_sve_ldbdu_zss,
5211             gen_helper_sve_ldhdu_be_zss,
5212             gen_helper_sve_ldsdu_be_zss,
5213             gen_helper_sve_lddd_be_zss, } },
5214         { { gen_helper_sve_ldbds_zd,
5215             gen_helper_sve_ldhds_be_zd,
5216             gen_helper_sve_ldsds_be_zd,
5217             NULL, },
5218           { gen_helper_sve_ldbdu_zd,
5219             gen_helper_sve_ldhdu_be_zd,
5220             gen_helper_sve_ldsdu_be_zd,
5221             gen_helper_sve_lddd_be_zd, } } },
5222
5223       /* First-fault */
5224       { { { gen_helper_sve_ldffbds_zsu,
5225             gen_helper_sve_ldffhds_be_zsu,
5226             gen_helper_sve_ldffsds_be_zsu,
5227             NULL, },
5228           { gen_helper_sve_ldffbdu_zsu,
5229             gen_helper_sve_ldffhdu_be_zsu,
5230             gen_helper_sve_ldffsdu_be_zsu,
5231             gen_helper_sve_ldffdd_be_zsu, } },
5232         { { gen_helper_sve_ldffbds_zss,
5233             gen_helper_sve_ldffhds_be_zss,
5234             gen_helper_sve_ldffsds_be_zss,
5235             NULL, },
5236           { gen_helper_sve_ldffbdu_zss,
5237             gen_helper_sve_ldffhdu_be_zss,
5238             gen_helper_sve_ldffsdu_be_zss,
5239             gen_helper_sve_ldffdd_be_zss, } },
5240         { { gen_helper_sve_ldffbds_zd,
5241             gen_helper_sve_ldffhds_be_zd,
5242             gen_helper_sve_ldffsds_be_zd,
5243             NULL, },
5244           { gen_helper_sve_ldffbdu_zd,
5245             gen_helper_sve_ldffhdu_be_zd,
5246             gen_helper_sve_ldffsdu_be_zd,
5247             gen_helper_sve_ldffdd_be_zd, } } } },
5248 };
5249
5250 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5251 {
5252     gen_helper_gvec_mem_scatter *fn = NULL;
5253     int be = s->be_data == MO_BE;
5254
5255     if (!sve_access_check(s)) {
5256         return true;
5257     }
5258
5259     switch (a->esz) {
5260     case MO_32:
5261         fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5262         break;
5263     case MO_64:
5264         fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5265         break;
5266     }
5267     assert(fn != NULL);
5268
5269     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5270                cpu_reg_sp(s, a->rn), a->msz, fn);
5271     return true;
5272 }
5273
5274 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5275 {
5276     gen_helper_gvec_mem_scatter *fn = NULL;
5277     int be = s->be_data == MO_BE;
5278     TCGv_i64 imm;
5279
5280     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5281         return false;
5282     }
5283     if (!sve_access_check(s)) {
5284         return true;
5285     }
5286
5287     switch (a->esz) {
5288     case MO_32:
5289         fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5290         break;
5291     case MO_64:
5292         fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5293         break;
5294     }
5295     assert(fn != NULL);
5296
5297     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5298      * by loading the immediate into the scalar parameter.
5299      */
5300     imm = tcg_const_i64(a->imm << a->msz);
5301     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5302     tcg_temp_free_i64(imm);
5303     return true;
5304 }
5305
5306 /* Indexed by [be][xs][msz].  */
5307 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5308     /* Little-endian */
5309     { { gen_helper_sve_stbs_zsu,
5310         gen_helper_sve_sths_le_zsu,
5311         gen_helper_sve_stss_le_zsu, },
5312       { gen_helper_sve_stbs_zss,
5313         gen_helper_sve_sths_le_zss,
5314         gen_helper_sve_stss_le_zss, } },
5315     /* Big-endian */
5316     { { gen_helper_sve_stbs_zsu,
5317         gen_helper_sve_sths_be_zsu,
5318         gen_helper_sve_stss_be_zsu, },
5319       { gen_helper_sve_stbs_zss,
5320         gen_helper_sve_sths_be_zss,
5321         gen_helper_sve_stss_be_zss, } },
5322 };
5323
5324 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5325 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5326     /* Little-endian */
5327     { { gen_helper_sve_stbd_zsu,
5328         gen_helper_sve_sthd_le_zsu,
5329         gen_helper_sve_stsd_le_zsu,
5330         gen_helper_sve_stdd_le_zsu, },
5331       { gen_helper_sve_stbd_zss,
5332         gen_helper_sve_sthd_le_zss,
5333         gen_helper_sve_stsd_le_zss,
5334         gen_helper_sve_stdd_le_zss, },
5335       { gen_helper_sve_stbd_zd,
5336         gen_helper_sve_sthd_le_zd,
5337         gen_helper_sve_stsd_le_zd,
5338         gen_helper_sve_stdd_le_zd, } },
5339     /* Big-endian */
5340     { { gen_helper_sve_stbd_zsu,
5341         gen_helper_sve_sthd_be_zsu,
5342         gen_helper_sve_stsd_be_zsu,
5343         gen_helper_sve_stdd_be_zsu, },
5344       { gen_helper_sve_stbd_zss,
5345         gen_helper_sve_sthd_be_zss,
5346         gen_helper_sve_stsd_be_zss,
5347         gen_helper_sve_stdd_be_zss, },
5348       { gen_helper_sve_stbd_zd,
5349         gen_helper_sve_sthd_be_zd,
5350         gen_helper_sve_stsd_be_zd,
5351         gen_helper_sve_stdd_be_zd, } },
5352 };
5353
5354 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5355 {
5356     gen_helper_gvec_mem_scatter *fn;
5357     int be = s->be_data == MO_BE;
5358
5359     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5360         return false;
5361     }
5362     if (!sve_access_check(s)) {
5363         return true;
5364     }
5365     switch (a->esz) {
5366     case MO_32:
5367         fn = scatter_store_fn32[be][a->xs][a->msz];
5368         break;
5369     case MO_64:
5370         fn = scatter_store_fn64[be][a->xs][a->msz];
5371         break;
5372     default:
5373         g_assert_not_reached();
5374     }
5375     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5376                cpu_reg_sp(s, a->rn), a->msz, fn);
5377     return true;
5378 }
5379
5380 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5381 {
5382     gen_helper_gvec_mem_scatter *fn = NULL;
5383     int be = s->be_data == MO_BE;
5384     TCGv_i64 imm;
5385
5386     if (a->esz < a->msz) {
5387         return false;
5388     }
5389     if (!sve_access_check(s)) {
5390         return true;
5391     }
5392
5393     switch (a->esz) {
5394     case MO_32:
5395         fn = scatter_store_fn32[be][0][a->msz];
5396         break;
5397     case MO_64:
5398         fn = scatter_store_fn64[be][2][a->msz];
5399         break;
5400     }
5401     assert(fn != NULL);
5402
5403     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5404      * by loading the immediate into the scalar parameter.
5405      */
5406     imm = tcg_const_i64(a->imm << a->msz);
5407     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5408     tcg_temp_free_i64(imm);
5409     return true;
5410 }
5411
5412 /*
5413  * Prefetches
5414  */
5415
5416 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5417 {
5418     /* Prefetch is a nop within QEMU.  */
5419     (void)sve_access_check(s);
5420     return true;
5421 }
5422
5423 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5424 {
5425     if (a->rm == 31) {
5426         return false;
5427     }
5428     /* Prefetch is a nop within QEMU.  */
5429     (void)sve_access_check(s);
5430     return true;
5431 }
5432
5433 /*
5434  * Move Prefix
5435  *
5436  * TODO: The implementation so far could handle predicated merging movprfx.
5437  * The helper functions as written take an extra source register to
5438  * use in the operation, but the result is only written when predication
5439  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5440  * to allow the final write back to the destination to be unconditional.
5441  * For predicated zeroing movprfx, we need to rearrange the helpers to
5442  * allow the final write back to zero inactives.
5443  *
5444  * In the meantime, just emit the moves.
5445  */
5446
5447 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5448 {
5449     return do_mov_z(s, a->rd, a->rn);
5450 }
5451
5452 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5453 {
5454     if (sve_access_check(s)) {
5455         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5456     }
5457     return true;
5458 }
5459
5460 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5461 {
5462     if (sve_access_check(s)) {
5463         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5464     }
5465     return true;
5466 }