target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg/tcg-op.h"
  24 #include "tcg/tcg-op-gvec.h"
  25 #include "tcg/tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(DisasContext *s, int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(DisasContext *s, int x)
  64 {
  65     return (16 << tszimm_esz(s, x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(DisasContext *s, int x)
  70 {
  71     return x - (8 << tszimm_esz(s, x));
  72 }
  73
  74 static inline int plus1(DisasContext *s, int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(DisasContext *s, int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(DisasContext *s, int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(DisasContext *s, int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294 }
 295
 296 /*
 297  *** SVE Integer Arithmetic - Unpredicated Group
 298  */
 299
 300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321 {
 322     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323 }
 324
 325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326 {
 327     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328 }
 329
 330 /*
 331  *** SVE Integer Arithmetic - Binary Predicated Group
 332  */
 333
 334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335 {
 336     unsigned vsz = vec_full_reg_size(s);
 337     if (fn == NULL) {
 338         return false;
 339     }
 340     if (sve_access_check(s)) {
 341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                            vec_full_reg_offset(s, a->rn),
 343                            vec_full_reg_offset(s, a->rm),
 344                            pred_full_reg_offset(s, a->pg),
 345                            vsz, vsz, 0, fn);
 346     }
 347     return true;
 348 }
 349
 350 /* Select active elememnts from Zn and inactive elements from Zm,
 351  * storing the result in Zd.
 352  */
 353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354 {
 355     static gen_helper_gvec_4 * const fns[4] = {
 356         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358     };
 359     unsigned vsz = vec_full_reg_size(s);
 360     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                        vec_full_reg_offset(s, rn),
 362                        vec_full_reg_offset(s, rm),
 363                        pred_full_reg_offset(s, pg),
 364                        vsz, vsz, 0, fns[esz]);
 365 }
 366
 367 #define DO_ZPZZ(NAME, name) \
 368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369 {                                                                         \
 370     static gen_helper_gvec_4 * const fns[4] = {                           \
 371         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373     };                                                                    \
 374     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375 }
 376
 377 DO_ZPZZ(AND, and)
 378 DO_ZPZZ(EOR, eor)
 379 DO_ZPZZ(ORR, orr)
 380 DO_ZPZZ(BIC, bic)
 381
 382 DO_ZPZZ(ADD, add)
 383 DO_ZPZZ(SUB, sub)
 384
 385 DO_ZPZZ(SMAX, smax)
 386 DO_ZPZZ(UMAX, umax)
 387 DO_ZPZZ(SMIN, smin)
 388 DO_ZPZZ(UMIN, umin)
 389 DO_ZPZZ(SABD, sabd)
 390 DO_ZPZZ(UABD, uabd)
 391
 392 DO_ZPZZ(MUL, mul)
 393 DO_ZPZZ(SMULH, smulh)
 394 DO_ZPZZ(UMULH, umulh)
 395
 396 DO_ZPZZ(ASR, asr)
 397 DO_ZPZZ(LSR, lsr)
 398 DO_ZPZZ(LSL, lsl)
 399
 400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401 {
 402     static gen_helper_gvec_4 * const fns[4] = {
 403         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404     };
 405     return do_zpzz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409 {
 410     static gen_helper_gvec_4 * const fns[4] = {
 411         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412     };
 413     return do_zpzz_ool(s, a, fns[a->esz]);
 414 }
 415
 416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417 {
 418     if (sve_access_check(s)) {
 419         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420     }
 421     return true;
 422 }
 423
 424 #undef DO_ZPZZ
 425
 426 /*
 427  *** SVE Integer Arithmetic - Unary Predicated Group
 428  */
 429
 430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431 {
 432     if (fn == NULL) {
 433         return false;
 434     }
 435     if (sve_access_check(s)) {
 436         unsigned vsz = vec_full_reg_size(s);
 437         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                            vec_full_reg_offset(s, a->rn),
 439                            pred_full_reg_offset(s, a->pg),
 440                            vsz, vsz, 0, fn);
 441     }
 442     return true;
 443 }
 444
 445 #define DO_ZPZ(NAME, name) \
 446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447 {                                                                   \
 448     static gen_helper_gvec_3 * const fns[4] = {                     \
 449         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451     };                                                              \
 452     return do_zpz_ool(s, a, fns[a->esz]);                           \
 453 }
 454
 455 DO_ZPZ(CLS, cls)
 456 DO_ZPZ(CLZ, clz)
 457 DO_ZPZ(CNT_zpz, cnt_zpz)
 458 DO_ZPZ(CNOT, cnot)
 459 DO_ZPZ(NOT_zpz, not_zpz)
 460 DO_ZPZ(ABS, abs)
 461 DO_ZPZ(NEG, neg)
 462
 463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL,
 467         gen_helper_sve_fabs_h,
 468         gen_helper_sve_fabs_s,
 469         gen_helper_sve_fabs_d
 470     };
 471     return do_zpz_ool(s, a, fns[a->esz]);
 472 }
 473
 474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475 {
 476     static gen_helper_gvec_3 * const fns[4] = {
 477         NULL,
 478         gen_helper_sve_fneg_h,
 479         gen_helper_sve_fneg_s,
 480         gen_helper_sve_fneg_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL,
 489         gen_helper_sve_sxtb_h,
 490         gen_helper_sve_sxtb_s,
 491         gen_helper_sve_sxtb_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL,
 500         gen_helper_sve_uxtb_h,
 501         gen_helper_sve_uxtb_s,
 502         gen_helper_sve_uxtb_d
 503     };
 504     return do_zpz_ool(s, a, fns[a->esz]);
 505 }
 506
 507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508 {
 509     static gen_helper_gvec_3 * const fns[4] = {
 510         NULL, NULL,
 511         gen_helper_sve_sxth_s,
 512         gen_helper_sve_sxth_d
 513     };
 514     return do_zpz_ool(s, a, fns[a->esz]);
 515 }
 516
 517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518 {
 519     static gen_helper_gvec_3 * const fns[4] = {
 520         NULL, NULL,
 521         gen_helper_sve_uxth_s,
 522         gen_helper_sve_uxth_d
 523     };
 524     return do_zpz_ool(s, a, fns[a->esz]);
 525 }
 526
 527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528 {
 529     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530 }
 531
 532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535 }
 536
 537 #undef DO_ZPZ
 538
 539 /*
 540  *** SVE Integer Reduction Group
 541  */
 542
 543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                        gen_helper_gvec_reduc *fn)
 546 {
 547     unsigned vsz = vec_full_reg_size(s);
 548     TCGv_ptr t_zn, t_pg;
 549     TCGv_i32 desc;
 550     TCGv_i64 temp;
 551
 552     if (fn == NULL) {
 553         return false;
 554     }
 555     if (!sve_access_check(s)) {
 556         return true;
 557     }
 558
 559     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560     temp = tcg_temp_new_i64();
 561     t_zn = tcg_temp_new_ptr();
 562     t_pg = tcg_temp_new_ptr();
 563
 564     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566     fn(temp, t_zn, t_pg, desc);
 567     tcg_temp_free_ptr(t_zn);
 568     tcg_temp_free_ptr(t_pg);
 569     tcg_temp_free_i32(desc);
 570
 571     write_fp_dreg(s, a->rd, temp);
 572     tcg_temp_free_i64(temp);
 573     return true;
 574 }
 575
 576 #define DO_VPZ(NAME, name) \
 577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578 {                                                                        \
 579     static gen_helper_gvec_reduc * const fns[4] = {                      \
 580         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582     };                                                                   \
 583     return do_vpz_ool(s, a, fns[a->esz]);                                \
 584 }
 585
 586 DO_VPZ(ORV, orv)
 587 DO_VPZ(ANDV, andv)
 588 DO_VPZ(EORV, eorv)
 589
 590 DO_VPZ(UADDV, uaddv)
 591 DO_VPZ(SMAXV, smaxv)
 592 DO_VPZ(UMAXV, umaxv)
 593 DO_VPZ(SMINV, sminv)
 594 DO_VPZ(UMINV, uminv)
 595
 596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597 {
 598     static gen_helper_gvec_reduc * const fns[4] = {
 599         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600         gen_helper_sve_saddv_s, NULL
 601     };
 602     return do_vpz_ool(s, a, fns[a->esz]);
 603 }
 604
 605 #undef DO_VPZ
 606
 607 /*
 608  *** SVE Shift by Immediate - Predicated Group
 609  */
 610
 611 /* Store zero into every active element of Zd.  We will use this for two
 612  * and three-operand predicated instructions for which logic dictates a
 613  * zero result.
 614  */
 615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616 {
 617     static gen_helper_gvec_2 * const fns[4] = {
 618         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620     };
 621     if (sve_access_check(s)) {
 622         unsigned vsz = vec_full_reg_size(s);
 623         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                            pred_full_reg_offset(s, pg),
 625                            vsz, vsz, 0, fns[esz]);
 626     }
 627     return true;
 628 }
 629
 630 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636     };
 637     unsigned vsz = vec_full_reg_size(s);
 638     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                        vec_full_reg_offset(s, rn),
 640                        pred_full_reg_offset(s, pg),
 641                        vsz, vsz, 0, fns[esz]);
 642 }
 643
 644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                         gen_helper_gvec_3 *fn)
 646 {
 647     if (sve_access_check(s)) {
 648         unsigned vsz = vec_full_reg_size(s);
 649         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                            vec_full_reg_offset(s, a->rn),
 651                            pred_full_reg_offset(s, a->pg),
 652                            vsz, vsz, a->imm, fn);
 653     }
 654     return true;
 655 }
 656
 657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658 {
 659     static gen_helper_gvec_3 * const fns[4] = {
 660         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662     };
 663     if (a->esz < 0) {
 664         /* Invalid tsz encoding -- see tszimm_esz. */
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For
 668        arithmetic right-shift, it's the same as by one less. */
 669     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670     return do_zpzi_ool(s, a, fns[a->esz]);
 671 }
 672
 673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.
 701        For logical shifts, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710 {
 711     static gen_helper_gvec_3 * const fns[4] = {
 712         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714     };
 715     if (a->esz < 0) {
 716         return false;
 717     }
 718     /* Shift by element size is architecturally valid.  For arithmetic
 719        right shift for division, it is a zeroing operation.  */
 720     if (a->imm >= (8 << a->esz)) {
 721         return do_clr_zp(s, a->rd, a->pg, a->esz);
 722     } else {
 723         return do_zpzi_ool(s, a, fns[a->esz]);
 724     }
 725 }
 726
 727 /*
 728  *** SVE Bitwise Shift - Predicated Group
 729  */
 730
 731 #define DO_ZPZW(NAME, name) \
 732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733 {                                                                         \
 734     static gen_helper_gvec_4 * const fns[3] = {                           \
 735         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736         gen_helper_sve_##name##_zpzw_s,                                   \
 737     };                                                                    \
 738     if (a->esz < 0 || a->esz >= 3) {                                      \
 739         return false;                                                     \
 740     }                                                                     \
 741     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742 }
 743
 744 DO_ZPZW(ASR, asr)
 745 DO_ZPZW(LSR, lsr)
 746 DO_ZPZW(LSL, lsl)
 747
 748 #undef DO_ZPZW
 749
 750 /*
 751  *** SVE Bitwise Shift - Unpredicated Group
 752  */
 753
 754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                          int64_t, uint32_t, uint32_t))
 757 {
 758     if (a->esz < 0) {
 759         /* Invalid tsz encoding -- see tszimm_esz. */
 760         return false;
 761     }
 762     if (sve_access_check(s)) {
 763         unsigned vsz = vec_full_reg_size(s);
 764         /* Shift by element size is architecturally valid.  For
 765            arithmetic right-shift, it's the same as by one less.
 766            Otherwise it is a zeroing operation.  */
 767         if (a->imm >= 8 << a->esz) {
 768             if (asr) {
 769                 a->imm = (8 << a->esz) - 1;
 770             } else {
 771                 do_dupi_z(s, a->rd, 0);
 772                 return true;
 773             }
 774         }
 775         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777     }
 778     return true;
 779 }
 780
 781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782 {
 783     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784 }
 785
 786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787 {
 788     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789 }
 790
 791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792 {
 793     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794 }
 795
 796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797 {
 798     if (fn == NULL) {
 799         return false;
 800     }
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->rn),
 805                            vec_full_reg_offset(s, a->rm),
 806                            vsz, vsz, 0, fn);
 807     }
 808     return true;
 809 }
 810
 811 #define DO_ZZW(NAME, name) \
 812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813 {                                                                         \
 814     static gen_helper_gvec_3 * const fns[4] = {                           \
 815         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816         gen_helper_sve_##name##_zzw_s, NULL                               \
 817     };                                                                    \
 818     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819 }
 820
 821 DO_ZZW(ASR, asr)
 822 DO_ZZW(LSR, lsr)
 823 DO_ZZW(LSL, lsl)
 824
 825 #undef DO_ZZW
 826
 827 /*
 828  *** SVE Integer Multiply-Add Group
 829  */
 830
 831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                          gen_helper_gvec_5 *fn)
 833 {
 834     if (sve_access_check(s)) {
 835         unsigned vsz = vec_full_reg_size(s);
 836         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                            vec_full_reg_offset(s, a->ra),
 838                            vec_full_reg_offset(s, a->rn),
 839                            vec_full_reg_offset(s, a->rm),
 840                            pred_full_reg_offset(s, a->pg),
 841                            vsz, vsz, 0, fn);
 842     }
 843     return true;
 844 }
 845
 846 #define DO_ZPZZZ(NAME, name) \
 847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848 {                                                                    \
 849     static gen_helper_gvec_5 * const fns[4] = {                      \
 850         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852     };                                                               \
 853     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854 }
 855
 856 DO_ZPZZZ(MLA, mla)
 857 DO_ZPZZZ(MLS, mls)
 858
 859 #undef DO_ZPZZZ
 860
 861 /*
 862  *** SVE Index Generation Group
 863  */
 864
 865 static void do_index(DisasContext *s, int esz, int rd,
 866                      TCGv_i64 start, TCGv_i64 incr)
 867 {
 868     unsigned vsz = vec_full_reg_size(s);
 869     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870     TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873     if (esz == 3) {
 874         gen_helper_sve_index_d(t_zd, start, incr, desc);
 875     } else {
 876         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877         static index_fn * const fns[3] = {
 878             gen_helper_sve_index_b,
 879             gen_helper_sve_index_h,
 880             gen_helper_sve_index_s,
 881         };
 882         TCGv_i32 s32 = tcg_temp_new_i32();
 883         TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885         tcg_gen_extrl_i64_i32(s32, start);
 886         tcg_gen_extrl_i64_i32(i32, incr);
 887         fns[esz](t_zd, s32, i32, desc);
 888
 889         tcg_temp_free_i32(s32);
 890         tcg_temp_free_i32(i32);
 891     }
 892     tcg_temp_free_ptr(t_zd);
 893     tcg_temp_free_i32(desc);
 894 }
 895
 896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897 {
 898     if (sve_access_check(s)) {
 899         TCGv_i64 start = tcg_const_i64(a->imm1);
 900         TCGv_i64 incr = tcg_const_i64(a->imm2);
 901         do_index(s, a->esz, a->rd, start, incr);
 902         tcg_temp_free_i64(start);
 903         tcg_temp_free_i64(incr);
 904     }
 905     return true;
 906 }
 907
 908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909 {
 910     if (sve_access_check(s)) {
 911         TCGv_i64 start = tcg_const_i64(a->imm);
 912         TCGv_i64 incr = cpu_reg(s, a->rm);
 913         do_index(s, a->esz, a->rd, start, incr);
 914         tcg_temp_free_i64(start);
 915     }
 916     return true;
 917 }
 918
 919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920 {
 921     if (sve_access_check(s)) {
 922         TCGv_i64 start = cpu_reg(s, a->rn);
 923         TCGv_i64 incr = tcg_const_i64(a->imm);
 924         do_index(s, a->esz, a->rd, start, incr);
 925         tcg_temp_free_i64(incr);
 926     }
 927     return true;
 928 }
 929
 930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931 {
 932     if (sve_access_check(s)) {
 933         TCGv_i64 start = cpu_reg(s, a->rn);
 934         TCGv_i64 incr = cpu_reg(s, a->rm);
 935         do_index(s, a->esz, a->rd, start, incr);
 936     }
 937     return true;
 938 }
 939
 940 /*
 941  *** SVE Stack Allocation Group
 942  */
 943
 944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945 {
 946     if (sve_access_check(s)) {
 947         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950     }
 951     return true;
 952 }
 953
 954 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955 {
 956     if (sve_access_check(s)) {
 957         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960     }
 961     return true;
 962 }
 963
 964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965 {
 966     if (sve_access_check(s)) {
 967         TCGv_i64 reg = cpu_reg(s, a->rd);
 968         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969     }
 970     return true;
 971 }
 972
 973 /*
 974  *** SVE Compute Vector Address Group
 975  */
 976
 977 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978 {
 979     if (sve_access_check(s)) {
 980         unsigned vsz = vec_full_reg_size(s);
 981         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                            vec_full_reg_offset(s, a->rn),
 983                            vec_full_reg_offset(s, a->rm),
 984                            vsz, vsz, a->imm, fn);
 985     }
 986     return true;
 987 }
 988
 989 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990 {
 991     return do_adr(s, a, gen_helper_sve_adr_p32);
 992 }
 993
 994 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995 {
 996     return do_adr(s, a, gen_helper_sve_adr_p64);
 997 }
 998
 999 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000 {
1001     return do_adr(s, a, gen_helper_sve_adr_s32);
1002 }
1003
1004 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005 {
1006     return do_adr(s, a, gen_helper_sve_adr_u32);
1007 }
1008
1009 /*
1010  *** SVE Integer Misc - Unpredicated Group
1011  */
1012
1013 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014 {
1015     static gen_helper_gvec_2 * const fns[4] = {
1016         NULL,
1017         gen_helper_sve_fexpa_h,
1018         gen_helper_sve_fexpa_s,
1019         gen_helper_sve_fexpa_d,
1020     };
1021     if (a->esz == 0) {
1022         return false;
1023     }
1024     if (sve_access_check(s)) {
1025         unsigned vsz = vec_full_reg_size(s);
1026         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                            vec_full_reg_offset(s, a->rn),
1028                            vsz, vsz, 0, fns[a->esz]);
1029     }
1030     return true;
1031 }
1032
1033 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034 {
1035     static gen_helper_gvec_3 * const fns[4] = {
1036         NULL,
1037         gen_helper_sve_ftssel_h,
1038         gen_helper_sve_ftssel_s,
1039         gen_helper_sve_ftssel_d,
1040     };
1041     if (a->esz == 0) {
1042         return false;
1043     }
1044     if (sve_access_check(s)) {
1045         unsigned vsz = vec_full_reg_size(s);
1046         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                            vec_full_reg_offset(s, a->rn),
1048                            vec_full_reg_offset(s, a->rm),
1049                            vsz, vsz, 0, fns[a->esz]);
1050     }
1051     return true;
1052 }
1053
1054 /*
1055  *** SVE Predicate Logical Operations Group
1056  */
1057
1058 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                           const GVecGen4 *gvec_op)
1060 {
1061     if (!sve_access_check(s)) {
1062         return true;
1063     }
1064
1065     unsigned psz = pred_gvec_reg_size(s);
1066     int dofs = pred_full_reg_offset(s, a->rd);
1067     int nofs = pred_full_reg_offset(s, a->rn);
1068     int mofs = pred_full_reg_offset(s, a->rm);
1069     int gofs = pred_full_reg_offset(s, a->pg);
1070
1071     if (psz == 8) {
1072         /* Do the operation and the flags generation in temps.  */
1073         TCGv_i64 pd = tcg_temp_new_i64();
1074         TCGv_i64 pn = tcg_temp_new_i64();
1075         TCGv_i64 pm = tcg_temp_new_i64();
1076         TCGv_i64 pg = tcg_temp_new_i64();
1077
1078         tcg_gen_ld_i64(pn, cpu_env, nofs);
1079         tcg_gen_ld_i64(pm, cpu_env, mofs);
1080         tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082         gvec_op->fni8(pd, pn, pm, pg);
1083         tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085         do_predtest1(pd, pg);
1086
1087         tcg_temp_free_i64(pd);
1088         tcg_temp_free_i64(pn);
1089         tcg_temp_free_i64(pm);
1090         tcg_temp_free_i64(pg);
1091     } else {
1092         /* The operation and flags generation is large.  The computation
1093          * of the flags depends on the original contents of the guarding
1094          * predicate.  If the destination overwrites the guarding predicate,
1095          * then the easiest way to get this right is to save a copy.
1096           */
1097         int tofs = gofs;
1098         if (a->rd == a->pg) {
1099             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101         }
1102
1103         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104         do_predtest(s, dofs, tofs, psz / 8);
1105     }
1106     return true;
1107 }
1108
1109 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110 {
1111     tcg_gen_and_i64(pd, pn, pm);
1112     tcg_gen_and_i64(pd, pd, pg);
1113 }
1114
1115 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                            TCGv_vec pm, TCGv_vec pg)
1117 {
1118     tcg_gen_and_vec(vece, pd, pn, pm);
1119     tcg_gen_and_vec(vece, pd, pd, pg);
1120 }
1121
1122 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123 {
1124     static const GVecGen4 op = {
1125         .fni8 = gen_and_pg_i64,
1126         .fniv = gen_and_pg_vec,
1127         .fno = gen_helper_sve_and_pppp,
1128         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129     };
1130     if (a->s) {
1131         return do_pppp_flags(s, a, &op);
1132     } else if (a->rn == a->rm) {
1133         if (a->pg == a->rn) {
1134             return do_mov_p(s, a->rd, a->rn);
1135         } else {
1136             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137         }
1138     } else if (a->pg == a->rn || a->pg == a->rm) {
1139         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140     } else {
1141         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142     }
1143 }
1144
1145 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146 {
1147     tcg_gen_andc_i64(pd, pn, pm);
1148     tcg_gen_and_i64(pd, pd, pg);
1149 }
1150
1151 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                            TCGv_vec pm, TCGv_vec pg)
1153 {
1154     tcg_gen_andc_vec(vece, pd, pn, pm);
1155     tcg_gen_and_vec(vece, pd, pd, pg);
1156 }
1157
1158 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159 {
1160     static const GVecGen4 op = {
1161         .fni8 = gen_bic_pg_i64,
1162         .fniv = gen_bic_pg_vec,
1163         .fno = gen_helper_sve_bic_pppp,
1164         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165     };
1166     if (a->s) {
1167         return do_pppp_flags(s, a, &op);
1168     } else if (a->pg == a->rn) {
1169         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170     } else {
1171         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172     }
1173 }
1174
1175 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176 {
1177     tcg_gen_xor_i64(pd, pn, pm);
1178     tcg_gen_and_i64(pd, pd, pg);
1179 }
1180
1181 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                            TCGv_vec pm, TCGv_vec pg)
1183 {
1184     tcg_gen_xor_vec(vece, pd, pn, pm);
1185     tcg_gen_and_vec(vece, pd, pd, pg);
1186 }
1187
1188 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189 {
1190     static const GVecGen4 op = {
1191         .fni8 = gen_eor_pg_i64,
1192         .fniv = gen_eor_pg_vec,
1193         .fno = gen_helper_sve_eor_pppp,
1194         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195     };
1196     if (a->s) {
1197         return do_pppp_flags(s, a, &op);
1198     } else {
1199         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200     }
1201 }
1202
1203 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204 {
1205     tcg_gen_and_i64(pn, pn, pg);
1206     tcg_gen_andc_i64(pm, pm, pg);
1207     tcg_gen_or_i64(pd, pn, pm);
1208 }
1209
1210 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                            TCGv_vec pm, TCGv_vec pg)
1212 {
1213     tcg_gen_and_vec(vece, pn, pn, pg);
1214     tcg_gen_andc_vec(vece, pm, pm, pg);
1215     tcg_gen_or_vec(vece, pd, pn, pm);
1216 }
1217
1218 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219 {
1220     static const GVecGen4 op = {
1221         .fni8 = gen_sel_pg_i64,
1222         .fniv = gen_sel_pg_vec,
1223         .fno = gen_helper_sve_sel_pppp,
1224         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225     };
1226     if (a->s) {
1227         return false;
1228     } else {
1229         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230     }
1231 }
1232
1233 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234 {
1235     tcg_gen_or_i64(pd, pn, pm);
1236     tcg_gen_and_i64(pd, pd, pg);
1237 }
1238
1239 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                            TCGv_vec pm, TCGv_vec pg)
1241 {
1242     tcg_gen_or_vec(vece, pd, pn, pm);
1243     tcg_gen_and_vec(vece, pd, pd, pg);
1244 }
1245
1246 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247 {
1248     static const GVecGen4 op = {
1249         .fni8 = gen_orr_pg_i64,
1250         .fniv = gen_orr_pg_vec,
1251         .fno = gen_helper_sve_orr_pppp,
1252         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253     };
1254     if (a->s) {
1255         return do_pppp_flags(s, a, &op);
1256     } else if (a->pg == a->rn && a->rn == a->rm) {
1257         return do_mov_p(s, a->rd, a->rn);
1258     } else {
1259         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260     }
1261 }
1262
1263 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264 {
1265     tcg_gen_orc_i64(pd, pn, pm);
1266     tcg_gen_and_i64(pd, pd, pg);
1267 }
1268
1269 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                            TCGv_vec pm, TCGv_vec pg)
1271 {
1272     tcg_gen_orc_vec(vece, pd, pn, pm);
1273     tcg_gen_and_vec(vece, pd, pd, pg);
1274 }
1275
1276 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277 {
1278     static const GVecGen4 op = {
1279         .fni8 = gen_orn_pg_i64,
1280         .fniv = gen_orn_pg_vec,
1281         .fno = gen_helper_sve_orn_pppp,
1282         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283     };
1284     if (a->s) {
1285         return do_pppp_flags(s, a, &op);
1286     } else {
1287         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288     }
1289 }
1290
1291 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292 {
1293     tcg_gen_or_i64(pd, pn, pm);
1294     tcg_gen_andc_i64(pd, pg, pd);
1295 }
1296
1297 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                            TCGv_vec pm, TCGv_vec pg)
1299 {
1300     tcg_gen_or_vec(vece, pd, pn, pm);
1301     tcg_gen_andc_vec(vece, pd, pg, pd);
1302 }
1303
1304 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305 {
1306     static const GVecGen4 op = {
1307         .fni8 = gen_nor_pg_i64,
1308         .fniv = gen_nor_pg_vec,
1309         .fno = gen_helper_sve_nor_pppp,
1310         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311     };
1312     if (a->s) {
1313         return do_pppp_flags(s, a, &op);
1314     } else {
1315         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316     }
1317 }
1318
1319 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320 {
1321     tcg_gen_and_i64(pd, pn, pm);
1322     tcg_gen_andc_i64(pd, pg, pd);
1323 }
1324
1325 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                            TCGv_vec pm, TCGv_vec pg)
1327 {
1328     tcg_gen_and_vec(vece, pd, pn, pm);
1329     tcg_gen_andc_vec(vece, pd, pg, pd);
1330 }
1331
1332 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333 {
1334     static const GVecGen4 op = {
1335         .fni8 = gen_nand_pg_i64,
1336         .fniv = gen_nand_pg_vec,
1337         .fno = gen_helper_sve_nand_pppp,
1338         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339     };
1340     if (a->s) {
1341         return do_pppp_flags(s, a, &op);
1342     } else {
1343         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344     }
1345 }
1346
1347 /*
1348  *** SVE Predicate Misc Group
1349  */
1350
1351 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352 {
1353     if (sve_access_check(s)) {
1354         int nofs = pred_full_reg_offset(s, a->rn);
1355         int gofs = pred_full_reg_offset(s, a->pg);
1356         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358         if (words == 1) {
1359             TCGv_i64 pn = tcg_temp_new_i64();
1360             TCGv_i64 pg = tcg_temp_new_i64();
1361
1362             tcg_gen_ld_i64(pn, cpu_env, nofs);
1363             tcg_gen_ld_i64(pg, cpu_env, gofs);
1364             do_predtest1(pn, pg);
1365
1366             tcg_temp_free_i64(pn);
1367             tcg_temp_free_i64(pg);
1368         } else {
1369             do_predtest(s, nofs, gofs, words);
1370         }
1371     }
1372     return true;
1373 }
1374
1375 /* See the ARM pseudocode DecodePredCount.  */
1376 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377 {
1378     unsigned elements = fullsz >> esz;
1379     unsigned bound;
1380
1381     switch (pattern) {
1382     case 0x0: /* POW2 */
1383         return pow2floor(elements);
1384     case 0x1: /* VL1 */
1385     case 0x2: /* VL2 */
1386     case 0x3: /* VL3 */
1387     case 0x4: /* VL4 */
1388     case 0x5: /* VL5 */
1389     case 0x6: /* VL6 */
1390     case 0x7: /* VL7 */
1391     case 0x8: /* VL8 */
1392         bound = pattern;
1393         break;
1394     case 0x9: /* VL16 */
1395     case 0xa: /* VL32 */
1396     case 0xb: /* VL64 */
1397     case 0xc: /* VL128 */
1398     case 0xd: /* VL256 */
1399         bound = 16 << (pattern - 9);
1400         break;
1401     case 0x1d: /* MUL4 */
1402         return elements - elements % 4;
1403     case 0x1e: /* MUL3 */
1404         return elements - elements % 3;
1405     case 0x1f: /* ALL */
1406         return elements;
1407     default:   /* #uimm5 */
1408         return 0;
1409     }
1410     return elements >= bound ? bound : 0;
1411 }
1412
1413 /* This handles all of the predicate initialization instructions,
1414  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415  * so that decode_pred_count returns 0.  For SETFFR, we will have
1416  * set RD == 16 == FFR.
1417  */
1418 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419 {
1420     if (!sve_access_check(s)) {
1421         return true;
1422     }
1423
1424     unsigned fullsz = vec_full_reg_size(s);
1425     unsigned ofs = pred_full_reg_offset(s, rd);
1426     unsigned numelem, setsz, i;
1427     uint64_t word, lastword;
1428     TCGv_i64 t;
1429
1430     numelem = decode_pred_count(fullsz, pat, esz);
1431
1432     /* Determine what we must store into each bit, and how many.  */
1433     if (numelem == 0) {
1434         lastword = word = 0;
1435         setsz = fullsz;
1436     } else {
1437         setsz = numelem << esz;
1438         lastword = word = pred_esz_masks[esz];
1439         if (setsz % 64) {
1440             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441         }
1442     }
1443
1444     t = tcg_temp_new_i64();
1445     if (fullsz <= 64) {
1446         tcg_gen_movi_i64(t, lastword);
1447         tcg_gen_st_i64(t, cpu_env, ofs);
1448         goto done;
1449     }
1450
1451     if (word == lastword) {
1452         unsigned maxsz = size_for_gvec(fullsz / 8);
1453         unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455         if (oprsz * 8 == setsz) {
1456             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457             goto done;
1458         }
1459     }
1460
1461     setsz /= 8;
1462     fullsz /= 8;
1463
1464     tcg_gen_movi_i64(t, word);
1465     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466         tcg_gen_st_i64(t, cpu_env, ofs + i);
1467     }
1468     if (lastword != word) {
1469         tcg_gen_movi_i64(t, lastword);
1470         tcg_gen_st_i64(t, cpu_env, ofs + i);
1471         i += 8;
1472     }
1473     if (i < fullsz) {
1474         tcg_gen_movi_i64(t, 0);
1475         for (; i < fullsz; i += 8) {
1476             tcg_gen_st_i64(t, cpu_env, ofs + i);
1477         }
1478     }
1479
1480  done:
1481     tcg_temp_free_i64(t);
1482
1483     /* PTRUES */
1484     if (setflag) {
1485         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486         tcg_gen_movi_i32(cpu_CF, word == 0);
1487         tcg_gen_movi_i32(cpu_VF, 0);
1488         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489     }
1490     return true;
1491 }
1492
1493 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494 {
1495     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496 }
1497
1498 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499 {
1500     /* Note pat == 31 is #all, to set all elements.  */
1501     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502 }
1503
1504 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505 {
1506     /* Note pat == 32 is #unimp, to set no elements.  */
1507     return do_predset(s, 0, a->rd, 32, false);
1508 }
1509
1510 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511 {
1512     /* The path through do_pppp_flags is complicated enough to want to avoid
1513      * duplication.  Frob the arguments into the form of a predicated AND.
1514      */
1515     arg_rprr_s alt_a = {
1516         .rd = a->rd, .pg = a->pg, .s = a->s,
1517         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518     };
1519     return trans_AND_pppp(s, &alt_a);
1520 }
1521
1522 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523 {
1524     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525 }
1526
1527 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528 {
1529     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530 }
1531
1532 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                            TCGv_ptr, TCGv_i32))
1535 {
1536     if (!sve_access_check(s)) {
1537         return true;
1538     }
1539
1540     TCGv_ptr t_pd = tcg_temp_new_ptr();
1541     TCGv_ptr t_pg = tcg_temp_new_ptr();
1542     TCGv_i32 t;
1543     unsigned desc;
1544
1545     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550     t = tcg_const_i32(desc);
1551
1552     gen_fn(t, t_pd, t_pg, t);
1553     tcg_temp_free_ptr(t_pd);
1554     tcg_temp_free_ptr(t_pg);
1555
1556     do_pred_flags(t);
1557     tcg_temp_free_i32(t);
1558     return true;
1559 }
1560
1561 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562 {
1563     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564 }
1565
1566 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567 {
1568     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569 }
1570
1571 /*
1572  *** SVE Element Count Group
1573  */
1574
1575 /* Perform an inline saturating addition of a 32-bit value within
1576  * a 64-bit register.  The second operand is known to be positive,
1577  * which halves the comparisions we must perform to bound the result.
1578  */
1579 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580 {
1581     int64_t ibound;
1582     TCGv_i64 bound;
1583     TCGCond cond;
1584
1585     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586     if (u) {
1587         tcg_gen_ext32u_i64(reg, reg);
1588     } else {
1589         tcg_gen_ext32s_i64(reg, reg);
1590     }
1591     if (d) {
1592         tcg_gen_sub_i64(reg, reg, val);
1593         ibound = (u ? 0 : INT32_MIN);
1594         cond = TCG_COND_LT;
1595     } else {
1596         tcg_gen_add_i64(reg, reg, val);
1597         ibound = (u ? UINT32_MAX : INT32_MAX);
1598         cond = TCG_COND_GT;
1599     }
1600     bound = tcg_const_i64(ibound);
1601     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602     tcg_temp_free_i64(bound);
1603 }
1604
1605 /* Similarly with 64-bit values.  */
1606 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607 {
1608     TCGv_i64 t0 = tcg_temp_new_i64();
1609     TCGv_i64 t1 = tcg_temp_new_i64();
1610     TCGv_i64 t2;
1611
1612     if (u) {
1613         if (d) {
1614             tcg_gen_sub_i64(t0, reg, val);
1615             tcg_gen_movi_i64(t1, 0);
1616             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617         } else {
1618             tcg_gen_add_i64(t0, reg, val);
1619             tcg_gen_movi_i64(t1, -1);
1620             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621         }
1622     } else {
1623         if (d) {
1624             /* Detect signed overflow for subtraction.  */
1625             tcg_gen_xor_i64(t0, reg, val);
1626             tcg_gen_sub_i64(t1, reg, val);
1627             tcg_gen_xor_i64(reg, reg, t1);
1628             tcg_gen_and_i64(t0, t0, reg);
1629
1630             /* Bound the result.  */
1631             tcg_gen_movi_i64(reg, INT64_MIN);
1632             t2 = tcg_const_i64(0);
1633             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634         } else {
1635             /* Detect signed overflow for addition.  */
1636             tcg_gen_xor_i64(t0, reg, val);
1637             tcg_gen_add_i64(reg, reg, val);
1638             tcg_gen_xor_i64(t1, reg, val);
1639             tcg_gen_andc_i64(t0, t1, t0);
1640
1641             /* Bound the result.  */
1642             tcg_gen_movi_i64(t1, INT64_MAX);
1643             t2 = tcg_const_i64(0);
1644             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645         }
1646         tcg_temp_free_i64(t2);
1647     }
1648     tcg_temp_free_i64(t0);
1649     tcg_temp_free_i64(t1);
1650 }
1651
1652 /* Similarly with a vector and a scalar operand.  */
1653 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                               TCGv_i64 val, bool u, bool d)
1655 {
1656     unsigned vsz = vec_full_reg_size(s);
1657     TCGv_ptr dptr, nptr;
1658     TCGv_i32 t32, desc;
1659     TCGv_i64 t64;
1660
1661     dptr = tcg_temp_new_ptr();
1662     nptr = tcg_temp_new_ptr();
1663     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667     switch (esz) {
1668     case MO_8:
1669         t32 = tcg_temp_new_i32();
1670         tcg_gen_extrl_i64_i32(t32, val);
1671         if (d) {
1672             tcg_gen_neg_i32(t32, t32);
1673         }
1674         if (u) {
1675             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676         } else {
1677             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678         }
1679         tcg_temp_free_i32(t32);
1680         break;
1681
1682     case MO_16:
1683         t32 = tcg_temp_new_i32();
1684         tcg_gen_extrl_i64_i32(t32, val);
1685         if (d) {
1686             tcg_gen_neg_i32(t32, t32);
1687         }
1688         if (u) {
1689             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690         } else {
1691             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692         }
1693         tcg_temp_free_i32(t32);
1694         break;
1695
1696     case MO_32:
1697         t64 = tcg_temp_new_i64();
1698         if (d) {
1699             tcg_gen_neg_i64(t64, val);
1700         } else {
1701             tcg_gen_mov_i64(t64, val);
1702         }
1703         if (u) {
1704             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705         } else {
1706             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707         }
1708         tcg_temp_free_i64(t64);
1709         break;
1710
1711     case MO_64:
1712         if (u) {
1713             if (d) {
1714                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715             } else {
1716                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717             }
1718         } else if (d) {
1719             t64 = tcg_temp_new_i64();
1720             tcg_gen_neg_i64(t64, val);
1721             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722             tcg_temp_free_i64(t64);
1723         } else {
1724             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725         }
1726         break;
1727
1728     default:
1729         g_assert_not_reached();
1730     }
1731
1732     tcg_temp_free_ptr(dptr);
1733     tcg_temp_free_ptr(nptr);
1734     tcg_temp_free_i32(desc);
1735 }
1736
1737 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738 {
1739     if (sve_access_check(s)) {
1740         unsigned fullsz = vec_full_reg_size(s);
1741         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743     }
1744     return true;
1745 }
1746
1747 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748 {
1749     if (sve_access_check(s)) {
1750         unsigned fullsz = vec_full_reg_size(s);
1751         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752         int inc = numelem * a->imm * (a->d ? -1 : 1);
1753         TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755         tcg_gen_addi_i64(reg, reg, inc);
1756     }
1757     return true;
1758 }
1759
1760 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761 {
1762     if (!sve_access_check(s)) {
1763         return true;
1764     }
1765
1766     unsigned fullsz = vec_full_reg_size(s);
1767     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768     int inc = numelem * a->imm;
1769     TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772     if (inc == 0) {
1773         if (a->u) {
1774             tcg_gen_ext32u_i64(reg, reg);
1775         } else {
1776             tcg_gen_ext32s_i64(reg, reg);
1777         }
1778     } else {
1779         TCGv_i64 t = tcg_const_i64(inc);
1780         do_sat_addsub_32(reg, t, a->u, a->d);
1781         tcg_temp_free_i64(t);
1782     }
1783     return true;
1784 }
1785
1786 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787 {
1788     if (!sve_access_check(s)) {
1789         return true;
1790     }
1791
1792     unsigned fullsz = vec_full_reg_size(s);
1793     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794     int inc = numelem * a->imm;
1795     TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797     if (inc != 0) {
1798         TCGv_i64 t = tcg_const_i64(inc);
1799         do_sat_addsub_64(reg, t, a->u, a->d);
1800         tcg_temp_free_i64(t);
1801     }
1802     return true;
1803 }
1804
1805 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806 {
1807     if (a->esz == 0) {
1808         return false;
1809     }
1810
1811     unsigned fullsz = vec_full_reg_size(s);
1812     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813     int inc = numelem * a->imm;
1814
1815     if (inc != 0) {
1816         if (sve_access_check(s)) {
1817             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                               vec_full_reg_offset(s, a->rn),
1820                               t, fullsz, fullsz);
1821             tcg_temp_free_i64(t);
1822         }
1823     } else {
1824         do_mov_z(s, a->rd, a->rn);
1825     }
1826     return true;
1827 }
1828
1829 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830 {
1831     if (a->esz == 0) {
1832         return false;
1833     }
1834
1835     unsigned fullsz = vec_full_reg_size(s);
1836     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837     int inc = numelem * a->imm;
1838
1839     if (inc != 0) {
1840         if (sve_access_check(s)) {
1841             TCGv_i64 t = tcg_const_i64(inc);
1842             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843             tcg_temp_free_i64(t);
1844         }
1845     } else {
1846         do_mov_z(s, a->rd, a->rn);
1847     }
1848     return true;
1849 }
1850
1851 /*
1852  *** SVE Bitwise Immediate Group
1853  */
1854
1855 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856 {
1857     uint64_t imm;
1858     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                 extract32(a->dbm, 0, 6),
1860                                 extract32(a->dbm, 6, 6))) {
1861         return false;
1862     }
1863     if (sve_access_check(s)) {
1864         unsigned vsz = vec_full_reg_size(s);
1865         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867     }
1868     return true;
1869 }
1870
1871 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872 {
1873     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874 }
1875
1876 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877 {
1878     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879 }
1880
1881 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884 }
1885
1886 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887 {
1888     uint64_t imm;
1889     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                 extract32(a->dbm, 0, 6),
1891                                 extract32(a->dbm, 6, 6))) {
1892         return false;
1893     }
1894     if (sve_access_check(s)) {
1895         do_dupi_z(s, a->rd, imm);
1896     }
1897     return true;
1898 }
1899
1900 /*
1901  *** SVE Integer Wide Immediate - Predicated Group
1902  */
1903
1904 /* Implement all merging copies.  This is used for CPY (immediate),
1905  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906  */
1907 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                      TCGv_i64 val)
1909 {
1910     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911     static gen_cpy * const fns[4] = {
1912         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914     };
1915     unsigned vsz = vec_full_reg_size(s);
1916     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917     TCGv_ptr t_zd = tcg_temp_new_ptr();
1918     TCGv_ptr t_zn = tcg_temp_new_ptr();
1919     TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925     fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927     tcg_temp_free_ptr(t_zd);
1928     tcg_temp_free_ptr(t_zn);
1929     tcg_temp_free_ptr(t_pg);
1930     tcg_temp_free_i32(desc);
1931 }
1932
1933 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934 {
1935     if (a->esz == 0) {
1936         return false;
1937     }
1938     if (sve_access_check(s)) {
1939         /* Decode the VFP immediate.  */
1940         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941         TCGv_i64 t_imm = tcg_const_i64(imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949 {
1950     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951         return false;
1952     }
1953     if (sve_access_check(s)) {
1954         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956         tcg_temp_free_i64(t_imm);
1957     }
1958     return true;
1959 }
1960
1961 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962 {
1963     static gen_helper_gvec_2i * const fns[4] = {
1964         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966     };
1967
1968     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969         return false;
1970     }
1971     if (sve_access_check(s)) {
1972         unsigned vsz = vec_full_reg_size(s);
1973         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                             pred_full_reg_offset(s, a->pg),
1976                             t_imm, vsz, vsz, 0, fns[a->esz]);
1977         tcg_temp_free_i64(t_imm);
1978     }
1979     return true;
1980 }
1981
1982 /*
1983  *** SVE Permute Extract Group
1984  */
1985
1986 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987 {
1988     if (!sve_access_check(s)) {
1989         return true;
1990     }
1991
1992     unsigned vsz = vec_full_reg_size(s);
1993     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994     unsigned n_siz = vsz - n_ofs;
1995     unsigned d = vec_full_reg_offset(s, a->rd);
1996     unsigned n = vec_full_reg_offset(s, a->rn);
1997     unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999     /* Use host vector move insns if we have appropriate sizes
2000      * and no unfortunate overlap.
2001      */
2002     if (m != d
2003         && n_ofs == size_for_gvec(n_ofs)
2004         && n_siz == size_for_gvec(n_siz)
2005         && (d != n || n_siz <= n_ofs)) {
2006         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007         if (n_ofs != 0) {
2008             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009         }
2010     } else {
2011         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012     }
2013     return true;
2014 }
2015
2016 /*
2017  *** SVE Permute - Unpredicated Group
2018  */
2019
2020 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021 {
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                              vsz, vsz, cpu_reg_sp(s, a->rn));
2026     }
2027     return true;
2028 }
2029
2030 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031 {
2032     if ((a->imm & 0x1f) == 0) {
2033         return false;
2034     }
2035     if (sve_access_check(s)) {
2036         unsigned vsz = vec_full_reg_size(s);
2037         unsigned dofs = vec_full_reg_offset(s, a->rd);
2038         unsigned esz, index;
2039
2040         esz = ctz32(a->imm);
2041         index = a->imm >> (esz + 1);
2042
2043         if ((index << esz) < vsz) {
2044             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046         } else {
2047             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048         }
2049     }
2050     return true;
2051 }
2052
2053 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2054 {
2055     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056     static gen_insr * const fns[4] = {
2057         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059     };
2060     unsigned vsz = vec_full_reg_size(s);
2061     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2062     TCGv_ptr t_zd = tcg_temp_new_ptr();
2063     TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068     fns[a->esz](t_zd, t_zn, val, desc);
2069
2070     tcg_temp_free_ptr(t_zd);
2071     tcg_temp_free_ptr(t_zn);
2072     tcg_temp_free_i32(desc);
2073 }
2074
2075 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2076 {
2077     if (sve_access_check(s)) {
2078         TCGv_i64 t = tcg_temp_new_i64();
2079         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2080         do_insr_i64(s, a, t);
2081         tcg_temp_free_i64(t);
2082     }
2083     return true;
2084 }
2085
2086 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2087 {
2088     if (sve_access_check(s)) {
2089         do_insr_i64(s, a, cpu_reg(s, a->rm));
2090     }
2091     return true;
2092 }
2093
2094 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2095 {
2096     static gen_helper_gvec_2 * const fns[4] = {
2097         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099     };
2100
2101     if (sve_access_check(s)) {
2102         unsigned vsz = vec_full_reg_size(s);
2103         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104                            vec_full_reg_offset(s, a->rn),
2105                            vsz, vsz, 0, fns[a->esz]);
2106     }
2107     return true;
2108 }
2109
2110 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2111 {
2112     static gen_helper_gvec_3 * const fns[4] = {
2113         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115     };
2116
2117     if (sve_access_check(s)) {
2118         unsigned vsz = vec_full_reg_size(s);
2119         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120                            vec_full_reg_offset(s, a->rn),
2121                            vec_full_reg_offset(s, a->rm),
2122                            vsz, vsz, 0, fns[a->esz]);
2123     }
2124     return true;
2125 }
2126
2127 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2128 {
2129     static gen_helper_gvec_2 * const fns[4][2] = {
2130         { NULL, NULL },
2131         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134     };
2135
2136     if (a->esz == 0) {
2137         return false;
2138     }
2139     if (sve_access_check(s)) {
2140         unsigned vsz = vec_full_reg_size(s);
2141         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142                            vec_full_reg_offset(s, a->rn)
2143                            + (a->h ? vsz / 2 : 0),
2144                            vsz, vsz, 0, fns[a->esz][a->u]);
2145     }
2146     return true;
2147 }
2148
2149 /*
2150  *** SVE Permute - Predicates Group
2151  */
2152
2153 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2154                           gen_helper_gvec_3 *fn)
2155 {
2156     if (!sve_access_check(s)) {
2157         return true;
2158     }
2159
2160     unsigned vsz = pred_full_reg_size(s);
2161
2162     /* Predicate sizes may be smaller and cannot use simd_desc.
2163        We cannot round up, as we do elsewhere, because we need
2164        the exact size for ZIP2 and REV.  We retain the style for
2165        the other helpers for consistency.  */
2166     TCGv_ptr t_d = tcg_temp_new_ptr();
2167     TCGv_ptr t_n = tcg_temp_new_ptr();
2168     TCGv_ptr t_m = tcg_temp_new_ptr();
2169     TCGv_i32 t_desc;
2170     int desc;
2171
2172     desc = vsz - 2;
2173     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2174     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2175
2176     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179     t_desc = tcg_const_i32(desc);
2180
2181     fn(t_d, t_n, t_m, t_desc);
2182
2183     tcg_temp_free_ptr(t_d);
2184     tcg_temp_free_ptr(t_n);
2185     tcg_temp_free_ptr(t_m);
2186     tcg_temp_free_i32(t_desc);
2187     return true;
2188 }
2189
2190 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2191                           gen_helper_gvec_2 *fn)
2192 {
2193     if (!sve_access_check(s)) {
2194         return true;
2195     }
2196
2197     unsigned vsz = pred_full_reg_size(s);
2198     TCGv_ptr t_d = tcg_temp_new_ptr();
2199     TCGv_ptr t_n = tcg_temp_new_ptr();
2200     TCGv_i32 t_desc;
2201     int desc;
2202
2203     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206     /* Predicate sizes may be smaller and cannot use simd_desc.
2207        We cannot round up, as we do elsewhere, because we need
2208        the exact size for ZIP2 and REV.  We retain the style for
2209        the other helpers for consistency.  */
2210
2211     desc = vsz - 2;
2212     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2213     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2214     t_desc = tcg_const_i32(desc);
2215
2216     fn(t_d, t_n, t_desc);
2217
2218     tcg_temp_free_i32(t_desc);
2219     tcg_temp_free_ptr(t_d);
2220     tcg_temp_free_ptr(t_n);
2221     return true;
2222 }
2223
2224 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2225 {
2226     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227 }
2228
2229 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2230 {
2231     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232 }
2233
2234 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2235 {
2236     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237 }
2238
2239 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2240 {
2241     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242 }
2243
2244 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2245 {
2246     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247 }
2248
2249 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2250 {
2251     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252 }
2253
2254 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2255 {
2256     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257 }
2258
2259 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2260 {
2261     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262 }
2263
2264 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2265 {
2266     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2267 }
2268
2269 /*
2270  *** SVE Permute - Interleaving Group
2271  */
2272
2273 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2274 {
2275     static gen_helper_gvec_3 * const fns[4] = {
2276         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278     };
2279
2280     if (sve_access_check(s)) {
2281         unsigned vsz = vec_full_reg_size(s);
2282         unsigned high_ofs = high ? vsz / 2 : 0;
2283         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                            vec_full_reg_offset(s, a->rn) + high_ofs,
2285                            vec_full_reg_offset(s, a->rm) + high_ofs,
2286                            vsz, vsz, 0, fns[a->esz]);
2287     }
2288     return true;
2289 }
2290
2291 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2292                             gen_helper_gvec_3 *fn)
2293 {
2294     if (sve_access_check(s)) {
2295         unsigned vsz = vec_full_reg_size(s);
2296         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297                            vec_full_reg_offset(s, a->rn),
2298                            vec_full_reg_offset(s, a->rm),
2299                            vsz, vsz, data, fn);
2300     }
2301     return true;
2302 }
2303
2304 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2305 {
2306     return do_zip(s, a, false);
2307 }
2308
2309 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2310 {
2311     return do_zip(s, a, true);
2312 }
2313
2314 static gen_helper_gvec_3 * const uzp_fns[4] = {
2315     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317 };
2318
2319 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2320 {
2321     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322 }
2323
2324 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2325 {
2326     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327 }
2328
2329 static gen_helper_gvec_3 * const trn_fns[4] = {
2330     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332 };
2333
2334 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2335 {
2336     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337 }
2338
2339 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2340 {
2341     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2342 }
2343
2344 /*
2345  *** SVE Permute Vector - Predicated Group
2346  */
2347
2348 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2349 {
2350     static gen_helper_gvec_3 * const fns[4] = {
2351         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352     };
2353     return do_zpz_ool(s, a, fns[a->esz]);
2354 }
2355
2356 /* Call the helper that computes the ARM LastActiveElement pseudocode
2357  * function, scaled by the element size.  This includes the not found
2358  * indication; e.g. not found for esz=3 is -8.
2359  */
2360 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2361 {
2362     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2363      * round up, as we do elsewhere, because we need the exact size.
2364      */
2365     TCGv_ptr t_p = tcg_temp_new_ptr();
2366     TCGv_i32 t_desc;
2367     unsigned vsz = pred_full_reg_size(s);
2368     unsigned desc;
2369
2370     desc = vsz - 2;
2371     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2372
2373     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374     t_desc = tcg_const_i32(desc);
2375
2376     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378     tcg_temp_free_i32(t_desc);
2379     tcg_temp_free_ptr(t_p);
2380 }
2381
2382 /* Increment LAST to the offset of the next element in the vector,
2383  * wrapping around to 0.
2384  */
2385 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2386 {
2387     unsigned vsz = vec_full_reg_size(s);
2388
2389     tcg_gen_addi_i32(last, last, 1 << esz);
2390     if (is_power_of_2(vsz)) {
2391         tcg_gen_andi_i32(last, last, vsz - 1);
2392     } else {
2393         TCGv_i32 max = tcg_const_i32(vsz);
2394         TCGv_i32 zero = tcg_const_i32(0);
2395         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396         tcg_temp_free_i32(max);
2397         tcg_temp_free_i32(zero);
2398     }
2399 }
2400
2401 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2402 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2403 {
2404     unsigned vsz = vec_full_reg_size(s);
2405
2406     if (is_power_of_2(vsz)) {
2407         tcg_gen_andi_i32(last, last, vsz - 1);
2408     } else {
2409         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2410         TCGv_i32 zero = tcg_const_i32(0);
2411         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412         tcg_temp_free_i32(max);
2413         tcg_temp_free_i32(zero);
2414     }
2415 }
2416
2417 /* Load an unsigned element of ESZ from BASE+OFS.  */
2418 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419 {
2420     TCGv_i64 r = tcg_temp_new_i64();
2421
2422     switch (esz) {
2423     case 0:
2424         tcg_gen_ld8u_i64(r, base, ofs);
2425         break;
2426     case 1:
2427         tcg_gen_ld16u_i64(r, base, ofs);
2428         break;
2429     case 2:
2430         tcg_gen_ld32u_i64(r, base, ofs);
2431         break;
2432     case 3:
2433         tcg_gen_ld_i64(r, base, ofs);
2434         break;
2435     default:
2436         g_assert_not_reached();
2437     }
2438     return r;
2439 }
2440
2441 /* Load an unsigned element of ESZ from RM[LAST].  */
2442 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443                                  int rm, int esz)
2444 {
2445     TCGv_ptr p = tcg_temp_new_ptr();
2446     TCGv_i64 r;
2447
2448     /* Convert offset into vector into offset into ENV.
2449      * The final adjustment for the vector register base
2450      * is added via constant offset to the load.
2451      */
2452 #ifdef HOST_WORDS_BIGENDIAN
2453     /* Adjust for element ordering.  See vec_reg_offset.  */
2454     if (esz < 3) {
2455         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2456     }
2457 #endif
2458     tcg_gen_ext_i32_ptr(p, last);
2459     tcg_gen_add_ptr(p, p, cpu_env);
2460
2461     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462     tcg_temp_free_ptr(p);
2463
2464     return r;
2465 }
2466
2467 /* Compute CLAST for a Zreg.  */
2468 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2469 {
2470     TCGv_i32 last;
2471     TCGLabel *over;
2472     TCGv_i64 ele;
2473     unsigned vsz, esz = a->esz;
2474
2475     if (!sve_access_check(s)) {
2476         return true;
2477     }
2478
2479     last = tcg_temp_local_new_i32();
2480     over = gen_new_label();
2481
2482     find_last_active(s, last, esz, a->pg);
2483
2484     /* There is of course no movcond for a 2048-bit vector,
2485      * so we must branch over the actual store.
2486      */
2487     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2488
2489     if (!before) {
2490         incr_last_active(s, last, esz);
2491     }
2492
2493     ele = load_last_active(s, last, a->rm, esz);
2494     tcg_temp_free_i32(last);
2495
2496     vsz = vec_full_reg_size(s);
2497     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498     tcg_temp_free_i64(ele);
2499
2500     /* If this insn used MOVPRFX, we may need a second move.  */
2501     if (a->rd != a->rn) {
2502         TCGLabel *done = gen_new_label();
2503         tcg_gen_br(done);
2504
2505         gen_set_label(over);
2506         do_mov_z(s, a->rd, a->rn);
2507
2508         gen_set_label(done);
2509     } else {
2510         gen_set_label(over);
2511     }
2512     return true;
2513 }
2514
2515 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2516 {
2517     return do_clast_vector(s, a, false);
2518 }
2519
2520 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2521 {
2522     return do_clast_vector(s, a, true);
2523 }
2524
2525 /* Compute CLAST for a scalar.  */
2526 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2527                             bool before, TCGv_i64 reg_val)
2528 {
2529     TCGv_i32 last = tcg_temp_new_i32();
2530     TCGv_i64 ele, cmp, zero;
2531
2532     find_last_active(s, last, esz, pg);
2533
2534     /* Extend the original value of last prior to incrementing.  */
2535     cmp = tcg_temp_new_i64();
2536     tcg_gen_ext_i32_i64(cmp, last);
2537
2538     if (!before) {
2539         incr_last_active(s, last, esz);
2540     }
2541
2542     /* The conceit here is that while last < 0 indicates not found, after
2543      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544      * from which we can load garbage.  We then discard the garbage with
2545      * a conditional move.
2546      */
2547     ele = load_last_active(s, last, rm, esz);
2548     tcg_temp_free_i32(last);
2549
2550     zero = tcg_const_i64(0);
2551     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553     tcg_temp_free_i64(zero);
2554     tcg_temp_free_i64(cmp);
2555     tcg_temp_free_i64(ele);
2556 }
2557
2558 /* Compute CLAST for a Vreg.  */
2559 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2560 {
2561     if (sve_access_check(s)) {
2562         int esz = a->esz;
2563         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2564         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567         write_fp_dreg(s, a->rd, reg);
2568         tcg_temp_free_i64(reg);
2569     }
2570     return true;
2571 }
2572
2573 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2574 {
2575     return do_clast_fp(s, a, false);
2576 }
2577
2578 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2579 {
2580     return do_clast_fp(s, a, true);
2581 }
2582
2583 /* Compute CLAST for a Xreg.  */
2584 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2585 {
2586     TCGv_i64 reg;
2587
2588     if (!sve_access_check(s)) {
2589         return true;
2590     }
2591
2592     reg = cpu_reg(s, a->rd);
2593     switch (a->esz) {
2594     case 0:
2595         tcg_gen_ext8u_i64(reg, reg);
2596         break;
2597     case 1:
2598         tcg_gen_ext16u_i64(reg, reg);
2599         break;
2600     case 2:
2601         tcg_gen_ext32u_i64(reg, reg);
2602         break;
2603     case 3:
2604         break;
2605     default:
2606         g_assert_not_reached();
2607     }
2608
2609     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610     return true;
2611 }
2612
2613 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2614 {
2615     return do_clast_general(s, a, false);
2616 }
2617
2618 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2619 {
2620     return do_clast_general(s, a, true);
2621 }
2622
2623 /* Compute LAST for a scalar.  */
2624 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2625                                int pg, int rm, bool before)
2626 {
2627     TCGv_i32 last = tcg_temp_new_i32();
2628     TCGv_i64 ret;
2629
2630     find_last_active(s, last, esz, pg);
2631     if (before) {
2632         wrap_last_active(s, last, esz);
2633     } else {
2634         incr_last_active(s, last, esz);
2635     }
2636
2637     ret = load_last_active(s, last, rm, esz);
2638     tcg_temp_free_i32(last);
2639     return ret;
2640 }
2641
2642 /* Compute LAST for a Vreg.  */
2643 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2644 {
2645     if (sve_access_check(s)) {
2646         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647         write_fp_dreg(s, a->rd, val);
2648         tcg_temp_free_i64(val);
2649     }
2650     return true;
2651 }
2652
2653 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2654 {
2655     return do_last_fp(s, a, false);
2656 }
2657
2658 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2659 {
2660     return do_last_fp(s, a, true);
2661 }
2662
2663 /* Compute LAST for a Xreg.  */
2664 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2665 {
2666     if (sve_access_check(s)) {
2667         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669         tcg_temp_free_i64(val);
2670     }
2671     return true;
2672 }
2673
2674 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2675 {
2676     return do_last_general(s, a, false);
2677 }
2678
2679 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2680 {
2681     return do_last_general(s, a, true);
2682 }
2683
2684 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2685 {
2686     if (sve_access_check(s)) {
2687         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688     }
2689     return true;
2690 }
2691
2692 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2693 {
2694     if (sve_access_check(s)) {
2695         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2696         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698         tcg_temp_free_i64(t);
2699     }
2700     return true;
2701 }
2702
2703 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2704 {
2705     static gen_helper_gvec_3 * const fns[4] = {
2706         NULL,
2707         gen_helper_sve_revb_h,
2708         gen_helper_sve_revb_s,
2709         gen_helper_sve_revb_d,
2710     };
2711     return do_zpz_ool(s, a, fns[a->esz]);
2712 }
2713
2714 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2715 {
2716     static gen_helper_gvec_3 * const fns[4] = {
2717         NULL,
2718         NULL,
2719         gen_helper_sve_revh_s,
2720         gen_helper_sve_revh_d,
2721     };
2722     return do_zpz_ool(s, a, fns[a->esz]);
2723 }
2724
2725 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2726 {
2727     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728 }
2729
2730 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2731 {
2732     static gen_helper_gvec_3 * const fns[4] = {
2733         gen_helper_sve_rbit_b,
2734         gen_helper_sve_rbit_h,
2735         gen_helper_sve_rbit_s,
2736         gen_helper_sve_rbit_d,
2737     };
2738     return do_zpz_ool(s, a, fns[a->esz]);
2739 }
2740
2741 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2742 {
2743     if (sve_access_check(s)) {
2744         unsigned vsz = vec_full_reg_size(s);
2745         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746                            vec_full_reg_offset(s, a->rn),
2747                            vec_full_reg_offset(s, a->rm),
2748                            pred_full_reg_offset(s, a->pg),
2749                            vsz, vsz, a->esz, gen_helper_sve_splice);
2750     }
2751     return true;
2752 }
2753
2754 /*
2755  *** SVE Integer Compare - Vectors Group
2756  */
2757
2758 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2759                           gen_helper_gvec_flags_4 *gen_fn)
2760 {
2761     TCGv_ptr pd, zn, zm, pg;
2762     unsigned vsz;
2763     TCGv_i32 t;
2764
2765     if (gen_fn == NULL) {
2766         return false;
2767     }
2768     if (!sve_access_check(s)) {
2769         return true;
2770     }
2771
2772     vsz = vec_full_reg_size(s);
2773     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2774     pd = tcg_temp_new_ptr();
2775     zn = tcg_temp_new_ptr();
2776     zm = tcg_temp_new_ptr();
2777     pg = tcg_temp_new_ptr();
2778
2779     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784     gen_fn(t, pd, zn, zm, pg, t);
2785
2786     tcg_temp_free_ptr(pd);
2787     tcg_temp_free_ptr(zn);
2788     tcg_temp_free_ptr(zm);
2789     tcg_temp_free_ptr(pg);
2790
2791     do_pred_flags(t);
2792
2793     tcg_temp_free_i32(t);
2794     return true;
2795 }
2796
2797 #define DO_PPZZ(NAME, name) \
2798 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2799 {                                                                         \
2800     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2801         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2802         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2803     };                                                                    \
2804     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2805 }
2806
2807 DO_PPZZ(CMPEQ, cmpeq)
2808 DO_PPZZ(CMPNE, cmpne)
2809 DO_PPZZ(CMPGT, cmpgt)
2810 DO_PPZZ(CMPGE, cmpge)
2811 DO_PPZZ(CMPHI, cmphi)
2812 DO_PPZZ(CMPHS, cmphs)
2813
2814 #undef DO_PPZZ
2815
2816 #define DO_PPZW(NAME, name) \
2817 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2818 {                                                                         \
2819     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2820         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2821         gen_helper_sve_##name##_ppzw_s, NULL                              \
2822     };                                                                    \
2823     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2824 }
2825
2826 DO_PPZW(CMPEQ, cmpeq)
2827 DO_PPZW(CMPNE, cmpne)
2828 DO_PPZW(CMPGT, cmpgt)
2829 DO_PPZW(CMPGE, cmpge)
2830 DO_PPZW(CMPHI, cmphi)
2831 DO_PPZW(CMPHS, cmphs)
2832 DO_PPZW(CMPLT, cmplt)
2833 DO_PPZW(CMPLE, cmple)
2834 DO_PPZW(CMPLO, cmplo)
2835 DO_PPZW(CMPLS, cmpls)
2836
2837 #undef DO_PPZW
2838
2839 /*
2840  *** SVE Integer Compare - Immediate Groups
2841  */
2842
2843 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2844                           gen_helper_gvec_flags_3 *gen_fn)
2845 {
2846     TCGv_ptr pd, zn, pg;
2847     unsigned vsz;
2848     TCGv_i32 t;
2849
2850     if (gen_fn == NULL) {
2851         return false;
2852     }
2853     if (!sve_access_check(s)) {
2854         return true;
2855     }
2856
2857     vsz = vec_full_reg_size(s);
2858     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859     pd = tcg_temp_new_ptr();
2860     zn = tcg_temp_new_ptr();
2861     pg = tcg_temp_new_ptr();
2862
2863     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867     gen_fn(t, pd, zn, pg, t);
2868
2869     tcg_temp_free_ptr(pd);
2870     tcg_temp_free_ptr(zn);
2871     tcg_temp_free_ptr(pg);
2872
2873     do_pred_flags(t);
2874
2875     tcg_temp_free_i32(t);
2876     return true;
2877 }
2878
2879 #define DO_PPZI(NAME, name) \
2880 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2881 {                                                                         \
2882     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2883         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2884         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2885     };                                                                    \
2886     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2887 }
2888
2889 DO_PPZI(CMPEQ, cmpeq)
2890 DO_PPZI(CMPNE, cmpne)
2891 DO_PPZI(CMPGT, cmpgt)
2892 DO_PPZI(CMPGE, cmpge)
2893 DO_PPZI(CMPHI, cmphi)
2894 DO_PPZI(CMPHS, cmphs)
2895 DO_PPZI(CMPLT, cmplt)
2896 DO_PPZI(CMPLE, cmple)
2897 DO_PPZI(CMPLO, cmplo)
2898 DO_PPZI(CMPLS, cmpls)
2899
2900 #undef DO_PPZI
2901
2902 /*
2903  *** SVE Partition Break Group
2904  */
2905
2906 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2907                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2908 {
2909     if (!sve_access_check(s)) {
2910         return true;
2911     }
2912
2913     unsigned vsz = pred_full_reg_size(s);
2914
2915     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2916     TCGv_ptr d = tcg_temp_new_ptr();
2917     TCGv_ptr n = tcg_temp_new_ptr();
2918     TCGv_ptr m = tcg_temp_new_ptr();
2919     TCGv_ptr g = tcg_temp_new_ptr();
2920     TCGv_i32 t = tcg_const_i32(vsz - 2);
2921
2922     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927     if (a->s) {
2928         fn_s(t, d, n, m, g, t);
2929         do_pred_flags(t);
2930     } else {
2931         fn(d, n, m, g, t);
2932     }
2933     tcg_temp_free_ptr(d);
2934     tcg_temp_free_ptr(n);
2935     tcg_temp_free_ptr(m);
2936     tcg_temp_free_ptr(g);
2937     tcg_temp_free_i32(t);
2938     return true;
2939 }
2940
2941 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2942                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2943 {
2944     if (!sve_access_check(s)) {
2945         return true;
2946     }
2947
2948     unsigned vsz = pred_full_reg_size(s);
2949
2950     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2951     TCGv_ptr d = tcg_temp_new_ptr();
2952     TCGv_ptr n = tcg_temp_new_ptr();
2953     TCGv_ptr g = tcg_temp_new_ptr();
2954     TCGv_i32 t = tcg_const_i32(vsz - 2);
2955
2956     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960     if (a->s) {
2961         fn_s(t, d, n, g, t);
2962         do_pred_flags(t);
2963     } else {
2964         fn(d, n, g, t);
2965     }
2966     tcg_temp_free_ptr(d);
2967     tcg_temp_free_ptr(n);
2968     tcg_temp_free_ptr(g);
2969     tcg_temp_free_i32(t);
2970     return true;
2971 }
2972
2973 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2974 {
2975     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976 }
2977
2978 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2979 {
2980     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981 }
2982
2983 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2984 {
2985     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986 }
2987
2988 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2989 {
2990     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991 }
2992
2993 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2994 {
2995     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996 }
2997
2998 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2999 {
3000     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001 }
3002
3003 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3004 {
3005     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006 }
3007
3008 /*
3009  *** SVE Predicate Count Group
3010  */
3011
3012 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3013 {
3014     unsigned psz = pred_full_reg_size(s);
3015
3016     if (psz <= 8) {
3017         uint64_t psz_mask;
3018
3019         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020         if (pn != pg) {
3021             TCGv_i64 g = tcg_temp_new_i64();
3022             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023             tcg_gen_and_i64(val, val, g);
3024             tcg_temp_free_i64(g);
3025         }
3026
3027         /* Reduce the pred_esz_masks value simply to reduce the
3028          * size of the code generated here.
3029          */
3030         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3031         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033         tcg_gen_ctpop_i64(val, val);
3034     } else {
3035         TCGv_ptr t_pn = tcg_temp_new_ptr();
3036         TCGv_ptr t_pg = tcg_temp_new_ptr();
3037         unsigned desc;
3038         TCGv_i32 t_desc;
3039
3040         desc = psz - 2;
3041         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3042
3043         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045         t_desc = tcg_const_i32(desc);
3046
3047         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048         tcg_temp_free_ptr(t_pn);
3049         tcg_temp_free_ptr(t_pg);
3050         tcg_temp_free_i32(t_desc);
3051     }
3052 }
3053
3054 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3055 {
3056     if (sve_access_check(s)) {
3057         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058     }
3059     return true;
3060 }
3061
3062 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3063 {
3064     if (sve_access_check(s)) {
3065         TCGv_i64 reg = cpu_reg(s, a->rd);
3066         TCGv_i64 val = tcg_temp_new_i64();
3067
3068         do_cntp(s, val, a->esz, a->pg, a->pg);
3069         if (a->d) {
3070             tcg_gen_sub_i64(reg, reg, val);
3071         } else {
3072             tcg_gen_add_i64(reg, reg, val);
3073         }
3074         tcg_temp_free_i64(val);
3075     }
3076     return true;
3077 }
3078
3079 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3080 {
3081     if (a->esz == 0) {
3082         return false;
3083     }
3084     if (sve_access_check(s)) {
3085         unsigned vsz = vec_full_reg_size(s);
3086         TCGv_i64 val = tcg_temp_new_i64();
3087         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089         do_cntp(s, val, a->esz, a->pg, a->pg);
3090         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092     }
3093     return true;
3094 }
3095
3096 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3097 {
3098     if (sve_access_check(s)) {
3099         TCGv_i64 reg = cpu_reg(s, a->rd);
3100         TCGv_i64 val = tcg_temp_new_i64();
3101
3102         do_cntp(s, val, a->esz, a->pg, a->pg);
3103         do_sat_addsub_32(reg, val, a->u, a->d);
3104     }
3105     return true;
3106 }
3107
3108 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3109 {
3110     if (sve_access_check(s)) {
3111         TCGv_i64 reg = cpu_reg(s, a->rd);
3112         TCGv_i64 val = tcg_temp_new_i64();
3113
3114         do_cntp(s, val, a->esz, a->pg, a->pg);
3115         do_sat_addsub_64(reg, val, a->u, a->d);
3116     }
3117     return true;
3118 }
3119
3120 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3121 {
3122     if (a->esz == 0) {
3123         return false;
3124     }
3125     if (sve_access_check(s)) {
3126         TCGv_i64 val = tcg_temp_new_i64();
3127         do_cntp(s, val, a->esz, a->pg, a->pg);
3128         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129     }
3130     return true;
3131 }
3132
3133 /*
3134  *** SVE Integer Compare Scalars Group
3135  */
3136
3137 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3138 {
3139     if (!sve_access_check(s)) {
3140         return true;
3141     }
3142
3143     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146     TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150     tcg_temp_free_i64(cmp);
3151
3152     /* VF = !NF & !CF.  */
3153     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3154     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156     /* Both NF and VF actually look at bit 31.  */
3157     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159     return true;
3160 }
3161
3162 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3163 {
3164     TCGv_i64 op0, op1, t0, t1, tmax;
3165     TCGv_i32 t2, t3;
3166     TCGv_ptr ptr;
3167     unsigned desc, vsz = vec_full_reg_size(s);
3168     TCGCond cond;
3169
3170     if (!sve_access_check(s)) {
3171         return true;
3172     }
3173
3174     op0 = read_cpu_reg(s, a->rn, 1);
3175     op1 = read_cpu_reg(s, a->rm, 1);
3176
3177     if (!a->sf) {
3178         if (a->u) {
3179             tcg_gen_ext32u_i64(op0, op0);
3180             tcg_gen_ext32u_i64(op1, op1);
3181         } else {
3182             tcg_gen_ext32s_i64(op0, op0);
3183             tcg_gen_ext32s_i64(op1, op1);
3184         }
3185     }
3186
3187     /* For the helper, compress the different conditions into a computation
3188      * of how many iterations for which the condition is true.
3189      */
3190     t0 = tcg_temp_new_i64();
3191     t1 = tcg_temp_new_i64();
3192     tcg_gen_sub_i64(t0, op1, op0);
3193
3194     tmax = tcg_const_i64(vsz >> a->esz);
3195     if (a->eq) {
3196         /* Equality means one more iteration.  */
3197         tcg_gen_addi_i64(t0, t0, 1);
3198
3199         /* If op1 is max (un)signed integer (and the only time the addition
3200          * above could overflow), then we produce an all-true predicate by
3201          * setting the count to the vector length.  This is because the
3202          * pseudocode is described as an increment + compare loop, and the
3203          * max integer would always compare true.
3204          */
3205         tcg_gen_movi_i64(t1, (a->sf
3206                               ? (a->u ? UINT64_MAX : INT64_MAX)
3207                               : (a->u ? UINT32_MAX : INT32_MAX)));
3208         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209     }
3210
3211     /* Bound to the maximum.  */
3212     tcg_gen_umin_i64(t0, t0, tmax);
3213     tcg_temp_free_i64(tmax);
3214
3215     /* Set the count to zero if the condition is false.  */
3216     cond = (a->u
3217             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219     tcg_gen_movi_i64(t1, 0);
3220     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3221     tcg_temp_free_i64(t1);
3222
3223     /* Since we're bounded, pass as a 32-bit type.  */
3224     t2 = tcg_temp_new_i32();
3225     tcg_gen_extrl_i64_i32(t2, t0);
3226     tcg_temp_free_i64(t0);
3227
3228     /* Scale elements to bits.  */
3229     tcg_gen_shli_i32(t2, t2, a->esz);
3230
3231     desc = (vsz / 8) - 2;
3232     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3233     t3 = tcg_const_i32(desc);
3234
3235     ptr = tcg_temp_new_ptr();
3236     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238     gen_helper_sve_while(t2, ptr, t2, t3);
3239     do_pred_flags(t2);
3240
3241     tcg_temp_free_ptr(ptr);
3242     tcg_temp_free_i32(t2);
3243     tcg_temp_free_i32(t3);
3244     return true;
3245 }
3246
3247 /*
3248  *** SVE Integer Wide Immediate - Unpredicated Group
3249  */
3250
3251 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3252 {
3253     if (a->esz == 0) {
3254         return false;
3255     }
3256     if (sve_access_check(s)) {
3257         unsigned vsz = vec_full_reg_size(s);
3258         int dofs = vec_full_reg_offset(s, a->rd);
3259         uint64_t imm;
3260
3261         /* Decode the VFP immediate.  */
3262         imm = vfp_expand_imm(a->esz, a->imm);
3263         imm = dup_const(a->esz, imm);
3264
3265         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266     }
3267     return true;
3268 }
3269
3270 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3271 {
3272     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3273         return false;
3274     }
3275     if (sve_access_check(s)) {
3276         unsigned vsz = vec_full_reg_size(s);
3277         int dofs = vec_full_reg_offset(s, a->rd);
3278
3279         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280     }
3281     return true;
3282 }
3283
3284 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3285 {
3286     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3287         return false;
3288     }
3289     if (sve_access_check(s)) {
3290         unsigned vsz = vec_full_reg_size(s);
3291         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293     }
3294     return true;
3295 }
3296
3297 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3298 {
3299     a->imm = -a->imm;
3300     return trans_ADD_zzi(s, a);
3301 }
3302
3303 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3304 {
3305     static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3306     static const GVecGen2s op[4] = {
3307         { .fni8 = tcg_gen_vec_sub8_i64,
3308           .fniv = tcg_gen_sub_vec,
3309           .fno = gen_helper_sve_subri_b,
3310           .opt_opc = vecop_list,
3311           .vece = MO_8,
3312           .scalar_first = true },
3313         { .fni8 = tcg_gen_vec_sub16_i64,
3314           .fniv = tcg_gen_sub_vec,
3315           .fno = gen_helper_sve_subri_h,
3316           .opt_opc = vecop_list,
3317           .vece = MO_16,
3318           .scalar_first = true },
3319         { .fni4 = tcg_gen_sub_i32,
3320           .fniv = tcg_gen_sub_vec,
3321           .fno = gen_helper_sve_subri_s,
3322           .opt_opc = vecop_list,
3323           .vece = MO_32,
3324           .scalar_first = true },
3325         { .fni8 = tcg_gen_sub_i64,
3326           .fniv = tcg_gen_sub_vec,
3327           .fno = gen_helper_sve_subri_d,
3328           .opt_opc = vecop_list,
3329           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3330           .vece = MO_64,
3331           .scalar_first = true }
3332     };
3333
3334     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3335         return false;
3336     }
3337     if (sve_access_check(s)) {
3338         unsigned vsz = vec_full_reg_size(s);
3339         TCGv_i64 c = tcg_const_i64(a->imm);
3340         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3341                         vec_full_reg_offset(s, a->rn),
3342                         vsz, vsz, c, &op[a->esz]);
3343         tcg_temp_free_i64(c);
3344     }
3345     return true;
3346 }
3347
3348 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3349 {
3350     if (sve_access_check(s)) {
3351         unsigned vsz = vec_full_reg_size(s);
3352         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3353                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3354     }
3355     return true;
3356 }
3357
3358 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3359 {
3360     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3361         return false;
3362     }
3363     if (sve_access_check(s)) {
3364         TCGv_i64 val = tcg_const_i64(a->imm);
3365         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3366         tcg_temp_free_i64(val);
3367     }
3368     return true;
3369 }
3370
3371 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3372 {
3373     return do_zzi_sat(s, a, false, false);
3374 }
3375
3376 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3377 {
3378     return do_zzi_sat(s, a, true, false);
3379 }
3380
3381 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3382 {
3383     return do_zzi_sat(s, a, false, true);
3384 }
3385
3386 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3387 {
3388     return do_zzi_sat(s, a, true, true);
3389 }
3390
3391 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3392 {
3393     if (sve_access_check(s)) {
3394         unsigned vsz = vec_full_reg_size(s);
3395         TCGv_i64 c = tcg_const_i64(a->imm);
3396
3397         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3398                             vec_full_reg_offset(s, a->rn),
3399                             c, vsz, vsz, 0, fn);
3400         tcg_temp_free_i64(c);
3401     }
3402     return true;
3403 }
3404
3405 #define DO_ZZI(NAME, name) \
3406 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3407 {                                                                       \
3408     static gen_helper_gvec_2i * const fns[4] = {                        \
3409         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3410         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3411     };                                                                  \
3412     return do_zzi_ool(s, a, fns[a->esz]);                               \
3413 }
3414
3415 DO_ZZI(SMAX, smax)
3416 DO_ZZI(UMAX, umax)
3417 DO_ZZI(SMIN, smin)
3418 DO_ZZI(UMIN, umin)
3419
3420 #undef DO_ZZI
3421
3422 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3423 {
3424     static gen_helper_gvec_3 * const fns[2][2] = {
3425         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3426         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3427     };
3428
3429     if (sve_access_check(s)) {
3430         unsigned vsz = vec_full_reg_size(s);
3431         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3432                            vec_full_reg_offset(s, a->rn),
3433                            vec_full_reg_offset(s, a->rm),
3434                            vsz, vsz, 0, fns[a->u][a->sz]);
3435     }
3436     return true;
3437 }
3438
3439 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3440 {
3441     static gen_helper_gvec_3 * const fns[2][2] = {
3442         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3443         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3444     };
3445
3446     if (sve_access_check(s)) {
3447         unsigned vsz = vec_full_reg_size(s);
3448         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3449                            vec_full_reg_offset(s, a->rn),
3450                            vec_full_reg_offset(s, a->rm),
3451                            vsz, vsz, a->index, fns[a->u][a->sz]);
3452     }
3453     return true;
3454 }
3455
3456
3457 /*
3458  *** SVE Floating Point Multiply-Add Indexed Group
3459  */
3460
3461 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3462 {
3463     static gen_helper_gvec_4_ptr * const fns[3] = {
3464         gen_helper_gvec_fmla_idx_h,
3465         gen_helper_gvec_fmla_idx_s,
3466         gen_helper_gvec_fmla_idx_d,
3467     };
3468
3469     if (sve_access_check(s)) {
3470         unsigned vsz = vec_full_reg_size(s);
3471         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3472         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3473                            vec_full_reg_offset(s, a->rn),
3474                            vec_full_reg_offset(s, a->rm),
3475                            vec_full_reg_offset(s, a->ra),
3476                            status, vsz, vsz, (a->index << 1) | a->sub,
3477                            fns[a->esz - 1]);
3478         tcg_temp_free_ptr(status);
3479     }
3480     return true;
3481 }
3482
3483 /*
3484  *** SVE Floating Point Multiply Indexed Group
3485  */
3486
3487 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3488 {
3489     static gen_helper_gvec_3_ptr * const fns[3] = {
3490         gen_helper_gvec_fmul_idx_h,
3491         gen_helper_gvec_fmul_idx_s,
3492         gen_helper_gvec_fmul_idx_d,
3493     };
3494
3495     if (sve_access_check(s)) {
3496         unsigned vsz = vec_full_reg_size(s);
3497         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3498         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3499                            vec_full_reg_offset(s, a->rn),
3500                            vec_full_reg_offset(s, a->rm),
3501                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3502         tcg_temp_free_ptr(status);
3503     }
3504     return true;
3505 }
3506
3507 /*
3508  *** SVE Floating Point Fast Reduction Group
3509  */
3510
3511 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3512                                   TCGv_ptr, TCGv_i32);
3513
3514 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3515                       gen_helper_fp_reduce *fn)
3516 {
3517     unsigned vsz = vec_full_reg_size(s);
3518     unsigned p2vsz = pow2ceil(vsz);
3519     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3520     TCGv_ptr t_zn, t_pg, status;
3521     TCGv_i64 temp;
3522
3523     temp = tcg_temp_new_i64();
3524     t_zn = tcg_temp_new_ptr();
3525     t_pg = tcg_temp_new_ptr();
3526
3527     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3528     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3529     status = get_fpstatus_ptr(a->esz == MO_16);
3530
3531     fn(temp, t_zn, t_pg, status, t_desc);
3532     tcg_temp_free_ptr(t_zn);
3533     tcg_temp_free_ptr(t_pg);
3534     tcg_temp_free_ptr(status);
3535     tcg_temp_free_i32(t_desc);
3536
3537     write_fp_dreg(s, a->rd, temp);
3538     tcg_temp_free_i64(temp);
3539 }
3540
3541 #define DO_VPZ(NAME, name) \
3542 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3543 {                                                                        \
3544     static gen_helper_fp_reduce * const fns[3] = {                       \
3545         gen_helper_sve_##name##_h,                                       \
3546         gen_helper_sve_##name##_s,                                       \
3547         gen_helper_sve_##name##_d,                                       \
3548     };                                                                   \
3549     if (a->esz == 0) {                                                   \
3550         return false;                                                    \
3551     }                                                                    \
3552     if (sve_access_check(s)) {                                           \
3553         do_reduce(s, a, fns[a->esz - 1]);                                \
3554     }                                                                    \
3555     return true;                                                         \
3556 }
3557
3558 DO_VPZ(FADDV, faddv)
3559 DO_VPZ(FMINNMV, fminnmv)
3560 DO_VPZ(FMAXNMV, fmaxnmv)
3561 DO_VPZ(FMINV, fminv)
3562 DO_VPZ(FMAXV, fmaxv)
3563
3564 /*
3565  *** SVE Floating Point Unary Operations - Unpredicated Group
3566  */
3567
3568 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3569 {
3570     unsigned vsz = vec_full_reg_size(s);
3571     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3572
3573     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3574                        vec_full_reg_offset(s, a->rn),
3575                        status, vsz, vsz, 0, fn);
3576     tcg_temp_free_ptr(status);
3577 }
3578
3579 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3580 {
3581     static gen_helper_gvec_2_ptr * const fns[3] = {
3582         gen_helper_gvec_frecpe_h,
3583         gen_helper_gvec_frecpe_s,
3584         gen_helper_gvec_frecpe_d,
3585     };
3586     if (a->esz == 0) {
3587         return false;
3588     }
3589     if (sve_access_check(s)) {
3590         do_zz_fp(s, a, fns[a->esz - 1]);
3591     }
3592     return true;
3593 }
3594
3595 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3596 {
3597     static gen_helper_gvec_2_ptr * const fns[3] = {
3598         gen_helper_gvec_frsqrte_h,
3599         gen_helper_gvec_frsqrte_s,
3600         gen_helper_gvec_frsqrte_d,
3601     };
3602     if (a->esz == 0) {
3603         return false;
3604     }
3605     if (sve_access_check(s)) {
3606         do_zz_fp(s, a, fns[a->esz - 1]);
3607     }
3608     return true;
3609 }
3610
3611 /*
3612  *** SVE Floating Point Compare with Zero Group
3613  */
3614
3615 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3616                       gen_helper_gvec_3_ptr *fn)
3617 {
3618     unsigned vsz = vec_full_reg_size(s);
3619     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3620
3621     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3622                        vec_full_reg_offset(s, a->rn),
3623                        pred_full_reg_offset(s, a->pg),
3624                        status, vsz, vsz, 0, fn);
3625     tcg_temp_free_ptr(status);
3626 }
3627
3628 #define DO_PPZ(NAME, name) \
3629 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3630 {                                                                 \
3631     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3632         gen_helper_sve_##name##_h,                                \
3633         gen_helper_sve_##name##_s,                                \
3634         gen_helper_sve_##name##_d,                                \
3635     };                                                            \
3636     if (a->esz == 0) {                                            \
3637         return false;                                             \
3638     }                                                             \
3639     if (sve_access_check(s)) {                                    \
3640         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3641     }                                                             \
3642     return true;                                                  \
3643 }
3644
3645 DO_PPZ(FCMGE_ppz0, fcmge0)
3646 DO_PPZ(FCMGT_ppz0, fcmgt0)
3647 DO_PPZ(FCMLE_ppz0, fcmle0)
3648 DO_PPZ(FCMLT_ppz0, fcmlt0)
3649 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3650 DO_PPZ(FCMNE_ppz0, fcmne0)
3651
3652 #undef DO_PPZ
3653
3654 /*
3655  *** SVE floating-point trig multiply-add coefficient
3656  */
3657
3658 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3659 {
3660     static gen_helper_gvec_3_ptr * const fns[3] = {
3661         gen_helper_sve_ftmad_h,
3662         gen_helper_sve_ftmad_s,
3663         gen_helper_sve_ftmad_d,
3664     };
3665
3666     if (a->esz == 0) {
3667         return false;
3668     }
3669     if (sve_access_check(s)) {
3670         unsigned vsz = vec_full_reg_size(s);
3671         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3672         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3673                            vec_full_reg_offset(s, a->rn),
3674                            vec_full_reg_offset(s, a->rm),
3675                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3676         tcg_temp_free_ptr(status);
3677     }
3678     return true;
3679 }
3680
3681 /*
3682  *** SVE Floating Point Accumulating Reduction Group
3683  */
3684
3685 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3686 {
3687     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3688                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3689     static fadda_fn * const fns[3] = {
3690         gen_helper_sve_fadda_h,
3691         gen_helper_sve_fadda_s,
3692         gen_helper_sve_fadda_d,
3693     };
3694     unsigned vsz = vec_full_reg_size(s);
3695     TCGv_ptr t_rm, t_pg, t_fpst;
3696     TCGv_i64 t_val;
3697     TCGv_i32 t_desc;
3698
3699     if (a->esz == 0) {
3700         return false;
3701     }
3702     if (!sve_access_check(s)) {
3703         return true;
3704     }
3705
3706     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3707     t_rm = tcg_temp_new_ptr();
3708     t_pg = tcg_temp_new_ptr();
3709     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3710     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3711     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3712     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3713
3714     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3715
3716     tcg_temp_free_i32(t_desc);
3717     tcg_temp_free_ptr(t_fpst);
3718     tcg_temp_free_ptr(t_pg);
3719     tcg_temp_free_ptr(t_rm);
3720
3721     write_fp_dreg(s, a->rd, t_val);
3722     tcg_temp_free_i64(t_val);
3723     return true;
3724 }
3725
3726 /*
3727  *** SVE Floating Point Arithmetic - Unpredicated Group
3728  */
3729
3730 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3731                       gen_helper_gvec_3_ptr *fn)
3732 {
3733     if (fn == NULL) {
3734         return false;
3735     }
3736     if (sve_access_check(s)) {
3737         unsigned vsz = vec_full_reg_size(s);
3738         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3739         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3740                            vec_full_reg_offset(s, a->rn),
3741                            vec_full_reg_offset(s, a->rm),
3742                            status, vsz, vsz, 0, fn);
3743         tcg_temp_free_ptr(status);
3744     }
3745     return true;
3746 }
3747
3748
3749 #define DO_FP3(NAME, name) \
3750 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3751 {                                                                   \
3752     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3753         NULL, gen_helper_gvec_##name##_h,                           \
3754         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3755     };                                                              \
3756     return do_zzz_fp(s, a, fns[a->esz]);                            \
3757 }
3758
3759 DO_FP3(FADD_zzz, fadd)
3760 DO_FP3(FSUB_zzz, fsub)
3761 DO_FP3(FMUL_zzz, fmul)
3762 DO_FP3(FTSMUL, ftsmul)
3763 DO_FP3(FRECPS, recps)
3764 DO_FP3(FRSQRTS, rsqrts)
3765
3766 #undef DO_FP3
3767
3768 /*
3769  *** SVE Floating Point Arithmetic - Predicated Group
3770  */
3771
3772 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3773                        gen_helper_gvec_4_ptr *fn)
3774 {
3775     if (fn == NULL) {
3776         return false;
3777     }
3778     if (sve_access_check(s)) {
3779         unsigned vsz = vec_full_reg_size(s);
3780         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3781         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3782                            vec_full_reg_offset(s, a->rn),
3783                            vec_full_reg_offset(s, a->rm),
3784                            pred_full_reg_offset(s, a->pg),
3785                            status, vsz, vsz, 0, fn);
3786         tcg_temp_free_ptr(status);
3787     }
3788     return true;
3789 }
3790
3791 #define DO_FP3(NAME, name) \
3792 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3793 {                                                                   \
3794     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3795         NULL, gen_helper_sve_##name##_h,                            \
3796         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3797     };                                                              \
3798     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3799 }
3800
3801 DO_FP3(FADD_zpzz, fadd)
3802 DO_FP3(FSUB_zpzz, fsub)
3803 DO_FP3(FMUL_zpzz, fmul)
3804 DO_FP3(FMIN_zpzz, fmin)
3805 DO_FP3(FMAX_zpzz, fmax)
3806 DO_FP3(FMINNM_zpzz, fminnum)
3807 DO_FP3(FMAXNM_zpzz, fmaxnum)
3808 DO_FP3(FABD, fabd)
3809 DO_FP3(FSCALE, fscalbn)
3810 DO_FP3(FDIV, fdiv)
3811 DO_FP3(FMULX, fmulx)
3812
3813 #undef DO_FP3
3814
3815 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3816                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3817
3818 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3819                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3820 {
3821     unsigned vsz = vec_full_reg_size(s);
3822     TCGv_ptr t_zd, t_zn, t_pg, status;
3823     TCGv_i32 desc;
3824
3825     t_zd = tcg_temp_new_ptr();
3826     t_zn = tcg_temp_new_ptr();
3827     t_pg = tcg_temp_new_ptr();
3828     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3829     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3830     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3831
3832     status = get_fpstatus_ptr(is_fp16);
3833     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3834     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3835
3836     tcg_temp_free_i32(desc);
3837     tcg_temp_free_ptr(status);
3838     tcg_temp_free_ptr(t_pg);
3839     tcg_temp_free_ptr(t_zn);
3840     tcg_temp_free_ptr(t_zd);
3841 }
3842
3843 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3844                       gen_helper_sve_fp2scalar *fn)
3845 {
3846     TCGv_i64 temp = tcg_const_i64(imm);
3847     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3848     tcg_temp_free_i64(temp);
3849 }
3850
3851 #define DO_FP_IMM(NAME, name, const0, const1) \
3852 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3853 {                                                                         \
3854     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3855         gen_helper_sve_##name##_h,                                        \
3856         gen_helper_sve_##name##_s,                                        \
3857         gen_helper_sve_##name##_d                                         \
3858     };                                                                    \
3859     static uint64_t const val[3][2] = {                                   \
3860         { float16_##const0, float16_##const1 },                           \
3861         { float32_##const0, float32_##const1 },                           \
3862         { float64_##const0, float64_##const1 },                           \
3863     };                                                                    \
3864     if (a->esz == 0) {                                                    \
3865         return false;                                                     \
3866     }                                                                     \
3867     if (sve_access_check(s)) {                                            \
3868         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3869     }                                                                     \
3870     return true;                                                          \
3871 }
3872
3873 #define float16_two  make_float16(0x4000)
3874 #define float32_two  make_float32(0x40000000)
3875 #define float64_two  make_float64(0x4000000000000000ULL)
3876
3877 DO_FP_IMM(FADD, fadds, half, one)
3878 DO_FP_IMM(FSUB, fsubs, half, one)
3879 DO_FP_IMM(FMUL, fmuls, half, two)
3880 DO_FP_IMM(FSUBR, fsubrs, half, one)
3881 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3882 DO_FP_IMM(FMINNM, fminnms, zero, one)
3883 DO_FP_IMM(FMAX, fmaxs, zero, one)
3884 DO_FP_IMM(FMIN, fmins, zero, one)
3885
3886 #undef DO_FP_IMM
3887
3888 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3889                       gen_helper_gvec_4_ptr *fn)
3890 {
3891     if (fn == NULL) {
3892         return false;
3893     }
3894     if (sve_access_check(s)) {
3895         unsigned vsz = vec_full_reg_size(s);
3896         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3897         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3898                            vec_full_reg_offset(s, a->rn),
3899                            vec_full_reg_offset(s, a->rm),
3900                            pred_full_reg_offset(s, a->pg),
3901                            status, vsz, vsz, 0, fn);
3902         tcg_temp_free_ptr(status);
3903     }
3904     return true;
3905 }
3906
3907 #define DO_FPCMP(NAME, name) \
3908 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3909 {                                                                     \
3910     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3911         NULL, gen_helper_sve_##name##_h,                              \
3912         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3913     };                                                                \
3914     return do_fp_cmp(s, a, fns[a->esz]);                              \
3915 }
3916
3917 DO_FPCMP(FCMGE, fcmge)
3918 DO_FPCMP(FCMGT, fcmgt)
3919 DO_FPCMP(FCMEQ, fcmeq)
3920 DO_FPCMP(FCMNE, fcmne)
3921 DO_FPCMP(FCMUO, fcmuo)
3922 DO_FPCMP(FACGE, facge)
3923 DO_FPCMP(FACGT, facgt)
3924
3925 #undef DO_FPCMP
3926
3927 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3928 {
3929     static gen_helper_gvec_4_ptr * const fns[3] = {
3930         gen_helper_sve_fcadd_h,
3931         gen_helper_sve_fcadd_s,
3932         gen_helper_sve_fcadd_d
3933     };
3934
3935     if (a->esz == 0) {
3936         return false;
3937     }
3938     if (sve_access_check(s)) {
3939         unsigned vsz = vec_full_reg_size(s);
3940         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942                            vec_full_reg_offset(s, a->rn),
3943                            vec_full_reg_offset(s, a->rm),
3944                            pred_full_reg_offset(s, a->pg),
3945                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3946         tcg_temp_free_ptr(status);
3947     }
3948     return true;
3949 }
3950
3951 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3954 {
3955     if (fn == NULL) {
3956         return false;
3957     }
3958     if (!sve_access_check(s)) {
3959         return true;
3960     }
3961
3962     unsigned vsz = vec_full_reg_size(s);
3963     unsigned desc;
3964     TCGv_i32 t_desc;
3965     TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967     /* We would need 7 operands to pass these arguments "properly".
3968      * So we encode all the register numbers into the descriptor.
3969      */
3970     desc = deposit32(a->rd, 5, 5, a->rn);
3971     desc = deposit32(desc, 10, 5, a->rm);
3972     desc = deposit32(desc, 15, 5, a->ra);
3973     desc = simd_desc(vsz, vsz, desc);
3974
3975     t_desc = tcg_const_i32(desc);
3976     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977     fn(cpu_env, pg, t_desc);
3978     tcg_temp_free_i32(t_desc);
3979     tcg_temp_free_ptr(pg);
3980     return true;
3981 }
3982
3983 #define DO_FMLA(NAME, name) \
3984 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3985 {                                                                    \
3986     static gen_helper_sve_fmla * const fns[4] = {                    \
3987         NULL, gen_helper_sve_##name##_h,                             \
3988         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3989     };                                                               \
3990     return do_fmla(s, a, fns[a->esz]);                               \
3991 }
3992
3993 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998 #undef DO_FMLA
3999
4000 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4001 {
4002     static gen_helper_sve_fmla * const fns[3] = {
4003         gen_helper_sve_fcmla_zpzzz_h,
4004         gen_helper_sve_fcmla_zpzzz_s,
4005         gen_helper_sve_fcmla_zpzzz_d,
4006     };
4007
4008     if (a->esz == 0) {
4009         return false;
4010     }
4011     if (sve_access_check(s)) {
4012         unsigned vsz = vec_full_reg_size(s);
4013         unsigned desc;
4014         TCGv_i32 t_desc;
4015         TCGv_ptr pg = tcg_temp_new_ptr();
4016
4017         /* We would need 7 operands to pass these arguments "properly".
4018          * So we encode all the register numbers into the descriptor.
4019          */
4020         desc = deposit32(a->rd, 5, 5, a->rn);
4021         desc = deposit32(desc, 10, 5, a->rm);
4022         desc = deposit32(desc, 15, 5, a->ra);
4023         desc = deposit32(desc, 20, 2, a->rot);
4024         desc = sextract32(desc, 0, 22);
4025         desc = simd_desc(vsz, vsz, desc);
4026
4027         t_desc = tcg_const_i32(desc);
4028         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4029         fns[a->esz - 1](cpu_env, pg, t_desc);
4030         tcg_temp_free_i32(t_desc);
4031         tcg_temp_free_ptr(pg);
4032     }
4033     return true;
4034 }
4035
4036 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4037 {
4038     static gen_helper_gvec_3_ptr * const fns[2] = {
4039         gen_helper_gvec_fcmlah_idx,
4040         gen_helper_gvec_fcmlas_idx,
4041     };
4042
4043     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4044     tcg_debug_assert(a->rd == a->ra);
4045     if (sve_access_check(s)) {
4046         unsigned vsz = vec_full_reg_size(s);
4047         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4048         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4049                            vec_full_reg_offset(s, a->rn),
4050                            vec_full_reg_offset(s, a->rm),
4051                            status, vsz, vsz,
4052                            a->index * 4 + a->rot,
4053                            fns[a->esz - 1]);
4054         tcg_temp_free_ptr(status);
4055     }
4056     return true;
4057 }
4058
4059 /*
4060  *** SVE Floating Point Unary Operations Predicated Group
4061  */
4062
4063 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4064                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4065 {
4066     if (sve_access_check(s)) {
4067         unsigned vsz = vec_full_reg_size(s);
4068         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4069         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4070                            vec_full_reg_offset(s, rn),
4071                            pred_full_reg_offset(s, pg),
4072                            status, vsz, vsz, 0, fn);
4073         tcg_temp_free_ptr(status);
4074     }
4075     return true;
4076 }
4077
4078 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4079 {
4080     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4081 }
4082
4083 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4084 {
4085     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4086 }
4087
4088 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4089 {
4090     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4091 }
4092
4093 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4094 {
4095     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4096 }
4097
4098 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4099 {
4100     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4101 }
4102
4103 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4104 {
4105     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4106 }
4107
4108 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4109 {
4110     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4111 }
4112
4113 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4114 {
4115     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4116 }
4117
4118 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4119 {
4120     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4121 }
4122
4123 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4124 {
4125     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4126 }
4127
4128 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4129 {
4130     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4131 }
4132
4133 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4134 {
4135     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4136 }
4137
4138 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4139 {
4140     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4141 }
4142
4143 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4144 {
4145     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4146 }
4147
4148 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4149 {
4150     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4151 }
4152
4153 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4154 {
4155     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4156 }
4157
4158 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4159 {
4160     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4161 }
4162
4163 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4164 {
4165     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4166 }
4167
4168 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4169 {
4170     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4171 }
4172
4173 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4174 {
4175     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4176 }
4177
4178 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4179     gen_helper_sve_frint_h,
4180     gen_helper_sve_frint_s,
4181     gen_helper_sve_frint_d
4182 };
4183
4184 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4185 {
4186     if (a->esz == 0) {
4187         return false;
4188     }
4189     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4190                       frint_fns[a->esz - 1]);
4191 }
4192
4193 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4194 {
4195     static gen_helper_gvec_3_ptr * const fns[3] = {
4196         gen_helper_sve_frintx_h,
4197         gen_helper_sve_frintx_s,
4198         gen_helper_sve_frintx_d
4199     };
4200     if (a->esz == 0) {
4201         return false;
4202     }
4203     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4204 }
4205
4206 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4207 {
4208     if (a->esz == 0) {
4209         return false;
4210     }
4211     if (sve_access_check(s)) {
4212         unsigned vsz = vec_full_reg_size(s);
4213         TCGv_i32 tmode = tcg_const_i32(mode);
4214         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4215
4216         gen_helper_set_rmode(tmode, tmode, status);
4217
4218         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4219                            vec_full_reg_offset(s, a->rn),
4220                            pred_full_reg_offset(s, a->pg),
4221                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4222
4223         gen_helper_set_rmode(tmode, tmode, status);
4224         tcg_temp_free_i32(tmode);
4225         tcg_temp_free_ptr(status);
4226     }
4227     return true;
4228 }
4229
4230 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4231 {
4232     return do_frint_mode(s, a, float_round_nearest_even);
4233 }
4234
4235 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4236 {
4237     return do_frint_mode(s, a, float_round_up);
4238 }
4239
4240 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4241 {
4242     return do_frint_mode(s, a, float_round_down);
4243 }
4244
4245 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4246 {
4247     return do_frint_mode(s, a, float_round_to_zero);
4248 }
4249
4250 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4251 {
4252     return do_frint_mode(s, a, float_round_ties_away);
4253 }
4254
4255 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4256 {
4257     static gen_helper_gvec_3_ptr * const fns[3] = {
4258         gen_helper_sve_frecpx_h,
4259         gen_helper_sve_frecpx_s,
4260         gen_helper_sve_frecpx_d
4261     };
4262     if (a->esz == 0) {
4263         return false;
4264     }
4265     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4266 }
4267
4268 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4269 {
4270     static gen_helper_gvec_3_ptr * const fns[3] = {
4271         gen_helper_sve_fsqrt_h,
4272         gen_helper_sve_fsqrt_s,
4273         gen_helper_sve_fsqrt_d
4274     };
4275     if (a->esz == 0) {
4276         return false;
4277     }
4278     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4279 }
4280
4281 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4282 {
4283     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4284 }
4285
4286 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4287 {
4288     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4289 }
4290
4291 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4292 {
4293     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4294 }
4295
4296 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4297 {
4298     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4299 }
4300
4301 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4302 {
4303     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4304 }
4305
4306 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4307 {
4308     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4309 }
4310
4311 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4312 {
4313     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4314 }
4315
4316 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4317 {
4318     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4319 }
4320
4321 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4322 {
4323     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4324 }
4325
4326 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4327 {
4328     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4329 }
4330
4331 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4332 {
4333     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4334 }
4335
4336 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4337 {
4338     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4339 }
4340
4341 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4342 {
4343     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4344 }
4345
4346 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4347 {
4348     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4349 }
4350
4351 /*
4352  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4353  */
4354
4355 /* Subroutine loading a vector register at VOFS of LEN bytes.
4356  * The load should begin at the address Rn + IMM.
4357  */
4358
4359 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4360 {
4361     int len_align = QEMU_ALIGN_DOWN(len, 8);
4362     int len_remain = len % 8;
4363     int nparts = len / 8 + ctpop8(len_remain);
4364     int midx = get_mem_index(s);
4365     TCGv_i64 addr, t0, t1;
4366
4367     addr = tcg_temp_new_i64();
4368     t0 = tcg_temp_new_i64();
4369
4370     /* Note that unpredicated load/store of vector/predicate registers
4371      * are defined as a stream of bytes, which equates to little-endian
4372      * operations on larger quantities.  There is no nice way to force
4373      * a little-endian load for aarch64_be-linux-user out of line.
4374      *
4375      * Attempt to keep code expansion to a minimum by limiting the
4376      * amount of unrolling done.
4377      */
4378     if (nparts <= 4) {
4379         int i;
4380
4381         for (i = 0; i < len_align; i += 8) {
4382             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4383             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4384             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4385         }
4386     } else {
4387         TCGLabel *loop = gen_new_label();
4388         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4389
4390         gen_set_label(loop);
4391
4392         /* Minimize the number of local temps that must be re-read from
4393          * the stack each iteration.  Instead, re-compute values other
4394          * than the loop counter.
4395          */
4396         tp = tcg_temp_new_ptr();
4397         tcg_gen_addi_ptr(tp, i, imm);
4398         tcg_gen_extu_ptr_i64(addr, tp);
4399         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4400
4401         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4402
4403         tcg_gen_add_ptr(tp, cpu_env, i);
4404         tcg_gen_addi_ptr(i, i, 8);
4405         tcg_gen_st_i64(t0, tp, vofs);
4406         tcg_temp_free_ptr(tp);
4407
4408         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4409         tcg_temp_free_ptr(i);
4410     }
4411
4412     /* Predicate register loads can be any multiple of 2.
4413      * Note that we still store the entire 64-bit unit into cpu_env.
4414      */
4415     if (len_remain) {
4416         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4417
4418         switch (len_remain) {
4419         case 2:
4420         case 4:
4421         case 8:
4422             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4423             break;
4424
4425         case 6:
4426             t1 = tcg_temp_new_i64();
4427             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4428             tcg_gen_addi_i64(addr, addr, 4);
4429             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4430             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4431             tcg_temp_free_i64(t1);
4432             break;
4433
4434         default:
4435             g_assert_not_reached();
4436         }
4437         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4438     }
4439     tcg_temp_free_i64(addr);
4440     tcg_temp_free_i64(t0);
4441 }
4442
4443 /* Similarly for stores.  */
4444 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4445 {
4446     int len_align = QEMU_ALIGN_DOWN(len, 8);
4447     int len_remain = len % 8;
4448     int nparts = len / 8 + ctpop8(len_remain);
4449     int midx = get_mem_index(s);
4450     TCGv_i64 addr, t0;
4451
4452     addr = tcg_temp_new_i64();
4453     t0 = tcg_temp_new_i64();
4454
4455     /* Note that unpredicated load/store of vector/predicate registers
4456      * are defined as a stream of bytes, which equates to little-endian
4457      * operations on larger quantities.  There is no nice way to force
4458      * a little-endian store for aarch64_be-linux-user out of line.
4459      *
4460      * Attempt to keep code expansion to a minimum by limiting the
4461      * amount of unrolling done.
4462      */
4463     if (nparts <= 4) {
4464         int i;
4465
4466         for (i = 0; i < len_align; i += 8) {
4467             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4468             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4469             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4470         }
4471     } else {
4472         TCGLabel *loop = gen_new_label();
4473         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4474
4475         gen_set_label(loop);
4476
4477         t2 = tcg_temp_new_ptr();
4478         tcg_gen_add_ptr(t2, cpu_env, i);
4479         tcg_gen_ld_i64(t0, t2, vofs);
4480
4481         /* Minimize the number of local temps that must be re-read from
4482          * the stack each iteration.  Instead, re-compute values other
4483          * than the loop counter.
4484          */
4485         tcg_gen_addi_ptr(t2, i, imm);
4486         tcg_gen_extu_ptr_i64(addr, t2);
4487         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4488         tcg_temp_free_ptr(t2);
4489
4490         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4491
4492         tcg_gen_addi_ptr(i, i, 8);
4493
4494         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4495         tcg_temp_free_ptr(i);
4496     }
4497
4498     /* Predicate register stores can be any multiple of 2.  */
4499     if (len_remain) {
4500         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4501         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4502
4503         switch (len_remain) {
4504         case 2:
4505         case 4:
4506         case 8:
4507             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4508             break;
4509
4510         case 6:
4511             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4512             tcg_gen_addi_i64(addr, addr, 4);
4513             tcg_gen_shri_i64(t0, t0, 32);
4514             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4515             break;
4516
4517         default:
4518             g_assert_not_reached();
4519         }
4520     }
4521     tcg_temp_free_i64(addr);
4522     tcg_temp_free_i64(t0);
4523 }
4524
4525 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4526 {
4527     if (sve_access_check(s)) {
4528         int size = vec_full_reg_size(s);
4529         int off = vec_full_reg_offset(s, a->rd);
4530         do_ldr(s, off, size, a->rn, a->imm * size);
4531     }
4532     return true;
4533 }
4534
4535 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4536 {
4537     if (sve_access_check(s)) {
4538         int size = pred_full_reg_size(s);
4539         int off = pred_full_reg_offset(s, a->rd);
4540         do_ldr(s, off, size, a->rn, a->imm * size);
4541     }
4542     return true;
4543 }
4544
4545 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4546 {
4547     if (sve_access_check(s)) {
4548         int size = vec_full_reg_size(s);
4549         int off = vec_full_reg_offset(s, a->rd);
4550         do_str(s, off, size, a->rn, a->imm * size);
4551     }
4552     return true;
4553 }
4554
4555 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4556 {
4557     if (sve_access_check(s)) {
4558         int size = pred_full_reg_size(s);
4559         int off = pred_full_reg_offset(s, a->rd);
4560         do_str(s, off, size, a->rn, a->imm * size);
4561     }
4562     return true;
4563 }
4564
4565 /*
4566  *** SVE Memory - Contiguous Load Group
4567  */
4568
4569 /* The memory mode of the dtype.  */
4570 static const MemOp dtype_mop[16] = {
4571     MO_UB, MO_UB, MO_UB, MO_UB,
4572     MO_SL, MO_UW, MO_UW, MO_UW,
4573     MO_SW, MO_SW, MO_UL, MO_UL,
4574     MO_SB, MO_SB, MO_SB, MO_Q
4575 };
4576
4577 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4578
4579 /* The vector element size of dtype.  */
4580 static const uint8_t dtype_esz[16] = {
4581     0, 1, 2, 3,
4582     3, 1, 2, 3,
4583     3, 2, 2, 3,
4584     3, 2, 1, 3
4585 };
4586
4587 static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4588 {
4589     return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4590 }
4591
4592 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4593                        int dtype, gen_helper_gvec_mem *fn)
4594 {
4595     unsigned vsz = vec_full_reg_size(s);
4596     TCGv_ptr t_pg;
4597     TCGv_i32 t_desc;
4598     int desc;
4599
4600     /* For e.g. LD4, there are not enough arguments to pass all 4
4601      * registers as pointers, so encode the regno into the data field.
4602      * For consistency, do this even for LD1.
4603      */
4604     desc = sve_memopidx(s, dtype);
4605     desc |= zt << MEMOPIDX_SHIFT;
4606     desc = simd_desc(vsz, vsz, desc);
4607     t_desc = tcg_const_i32(desc);
4608     t_pg = tcg_temp_new_ptr();
4609
4610     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4611     fn(cpu_env, t_pg, addr, t_desc);
4612
4613     tcg_temp_free_ptr(t_pg);
4614     tcg_temp_free_i32(t_desc);
4615 }
4616
4617 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4618                       TCGv_i64 addr, int dtype, int nreg)
4619 {
4620     static gen_helper_gvec_mem * const fns[2][16][4] = {
4621         /* Little-endian */
4622         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4623             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4624           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4625           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4626           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4627
4628           { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4629           { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4630             gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4631           { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4632           { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4633
4634           { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4635           { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4636           { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4637             gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4638           { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4639
4640           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4641           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4642           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4643           { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4644             gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4645
4646         /* Big-endian */
4647         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4648             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4649           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4650           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4651           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4652
4653           { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4654           { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4655             gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4656           { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4657           { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4658
4659           { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4660           { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4661           { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4662             gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4663           { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4664
4665           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4666           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4667           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4668           { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4669             gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4670     };
4671     gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4672
4673     /* While there are holes in the table, they are not
4674      * accessible via the instruction encoding.
4675      */
4676     assert(fn != NULL);
4677     do_mem_zpa(s, zt, pg, addr, dtype, fn);
4678 }
4679
4680 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4681 {
4682     if (a->rm == 31) {
4683         return false;
4684     }
4685     if (sve_access_check(s)) {
4686         TCGv_i64 addr = new_tmp_a64(s);
4687         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4688         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4689         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4690     }
4691     return true;
4692 }
4693
4694 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4695 {
4696     if (sve_access_check(s)) {
4697         int vsz = vec_full_reg_size(s);
4698         int elements = vsz >> dtype_esz[a->dtype];
4699         TCGv_i64 addr = new_tmp_a64(s);
4700
4701         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4702                          (a->imm * elements * (a->nreg + 1))
4703                          << dtype_msz(a->dtype));
4704         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4705     }
4706     return true;
4707 }
4708
4709 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4710 {
4711     static gen_helper_gvec_mem * const fns[2][16] = {
4712         /* Little-endian */
4713         { gen_helper_sve_ldff1bb_r,
4714           gen_helper_sve_ldff1bhu_r,
4715           gen_helper_sve_ldff1bsu_r,
4716           gen_helper_sve_ldff1bdu_r,
4717
4718           gen_helper_sve_ldff1sds_le_r,
4719           gen_helper_sve_ldff1hh_le_r,
4720           gen_helper_sve_ldff1hsu_le_r,
4721           gen_helper_sve_ldff1hdu_le_r,
4722
4723           gen_helper_sve_ldff1hds_le_r,
4724           gen_helper_sve_ldff1hss_le_r,
4725           gen_helper_sve_ldff1ss_le_r,
4726           gen_helper_sve_ldff1sdu_le_r,
4727
4728           gen_helper_sve_ldff1bds_r,
4729           gen_helper_sve_ldff1bss_r,
4730           gen_helper_sve_ldff1bhs_r,
4731           gen_helper_sve_ldff1dd_le_r },
4732
4733         /* Big-endian */
4734         { gen_helper_sve_ldff1bb_r,
4735           gen_helper_sve_ldff1bhu_r,
4736           gen_helper_sve_ldff1bsu_r,
4737           gen_helper_sve_ldff1bdu_r,
4738
4739           gen_helper_sve_ldff1sds_be_r,
4740           gen_helper_sve_ldff1hh_be_r,
4741           gen_helper_sve_ldff1hsu_be_r,
4742           gen_helper_sve_ldff1hdu_be_r,
4743
4744           gen_helper_sve_ldff1hds_be_r,
4745           gen_helper_sve_ldff1hss_be_r,
4746           gen_helper_sve_ldff1ss_be_r,
4747           gen_helper_sve_ldff1sdu_be_r,
4748
4749           gen_helper_sve_ldff1bds_r,
4750           gen_helper_sve_ldff1bss_r,
4751           gen_helper_sve_ldff1bhs_r,
4752           gen_helper_sve_ldff1dd_be_r },
4753     };
4754
4755     if (sve_access_check(s)) {
4756         TCGv_i64 addr = new_tmp_a64(s);
4757         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4758         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4759         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4760                    fns[s->be_data == MO_BE][a->dtype]);
4761     }
4762     return true;
4763 }
4764
4765 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4766 {
4767     static gen_helper_gvec_mem * const fns[2][16] = {
4768         /* Little-endian */
4769         { gen_helper_sve_ldnf1bb_r,
4770           gen_helper_sve_ldnf1bhu_r,
4771           gen_helper_sve_ldnf1bsu_r,
4772           gen_helper_sve_ldnf1bdu_r,
4773
4774           gen_helper_sve_ldnf1sds_le_r,
4775           gen_helper_sve_ldnf1hh_le_r,
4776           gen_helper_sve_ldnf1hsu_le_r,
4777           gen_helper_sve_ldnf1hdu_le_r,
4778
4779           gen_helper_sve_ldnf1hds_le_r,
4780           gen_helper_sve_ldnf1hss_le_r,
4781           gen_helper_sve_ldnf1ss_le_r,
4782           gen_helper_sve_ldnf1sdu_le_r,
4783
4784           gen_helper_sve_ldnf1bds_r,
4785           gen_helper_sve_ldnf1bss_r,
4786           gen_helper_sve_ldnf1bhs_r,
4787           gen_helper_sve_ldnf1dd_le_r },
4788
4789         /* Big-endian */
4790         { gen_helper_sve_ldnf1bb_r,
4791           gen_helper_sve_ldnf1bhu_r,
4792           gen_helper_sve_ldnf1bsu_r,
4793           gen_helper_sve_ldnf1bdu_r,
4794
4795           gen_helper_sve_ldnf1sds_be_r,
4796           gen_helper_sve_ldnf1hh_be_r,
4797           gen_helper_sve_ldnf1hsu_be_r,
4798           gen_helper_sve_ldnf1hdu_be_r,
4799
4800           gen_helper_sve_ldnf1hds_be_r,
4801           gen_helper_sve_ldnf1hss_be_r,
4802           gen_helper_sve_ldnf1ss_be_r,
4803           gen_helper_sve_ldnf1sdu_be_r,
4804
4805           gen_helper_sve_ldnf1bds_r,
4806           gen_helper_sve_ldnf1bss_r,
4807           gen_helper_sve_ldnf1bhs_r,
4808           gen_helper_sve_ldnf1dd_be_r },
4809     };
4810
4811     if (sve_access_check(s)) {
4812         int vsz = vec_full_reg_size(s);
4813         int elements = vsz >> dtype_esz[a->dtype];
4814         int off = (a->imm * elements) << dtype_msz(a->dtype);
4815         TCGv_i64 addr = new_tmp_a64(s);
4816
4817         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4818         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4819                    fns[s->be_data == MO_BE][a->dtype]);
4820     }
4821     return true;
4822 }
4823
4824 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4825 {
4826     static gen_helper_gvec_mem * const fns[2][4] = {
4827         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4828           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4829         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4830           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4831     };
4832     unsigned vsz = vec_full_reg_size(s);
4833     TCGv_ptr t_pg;
4834     TCGv_i32 t_desc;
4835     int desc, poff;
4836
4837     /* Load the first quadword using the normal predicated load helpers.  */
4838     desc = sve_memopidx(s, msz_dtype(s, msz));
4839     desc |= zt << MEMOPIDX_SHIFT;
4840     desc = simd_desc(16, 16, desc);
4841     t_desc = tcg_const_i32(desc);
4842
4843     poff = pred_full_reg_offset(s, pg);
4844     if (vsz > 16) {
4845         /*
4846          * Zero-extend the first 16 bits of the predicate into a temporary.
4847          * This avoids triggering an assert making sure we don't have bits
4848          * set within a predicate beyond VQ, but we have lowered VQ to 1
4849          * for this load operation.
4850          */
4851         TCGv_i64 tmp = tcg_temp_new_i64();
4852 #ifdef HOST_WORDS_BIGENDIAN
4853         poff += 6;
4854 #endif
4855         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4856
4857         poff = offsetof(CPUARMState, vfp.preg_tmp);
4858         tcg_gen_st_i64(tmp, cpu_env, poff);
4859         tcg_temp_free_i64(tmp);
4860     }
4861
4862     t_pg = tcg_temp_new_ptr();
4863     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4864
4865     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4866
4867     tcg_temp_free_ptr(t_pg);
4868     tcg_temp_free_i32(t_desc);
4869
4870     /* Replicate that first quadword.  */
4871     if (vsz > 16) {
4872         unsigned dofs = vec_full_reg_offset(s, zt);
4873         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4874     }
4875 }
4876
4877 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4878 {
4879     if (a->rm == 31) {
4880         return false;
4881     }
4882     if (sve_access_check(s)) {
4883         int msz = dtype_msz(a->dtype);
4884         TCGv_i64 addr = new_tmp_a64(s);
4885         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4886         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4887         do_ldrq(s, a->rd, a->pg, addr, msz);
4888     }
4889     return true;
4890 }
4891
4892 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4893 {
4894     if (sve_access_check(s)) {
4895         TCGv_i64 addr = new_tmp_a64(s);
4896         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4897         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4898     }
4899     return true;
4900 }
4901
4902 /* Load and broadcast element.  */
4903 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4904 {
4905     if (!sve_access_check(s)) {
4906         return true;
4907     }
4908
4909     unsigned vsz = vec_full_reg_size(s);
4910     unsigned psz = pred_full_reg_size(s);
4911     unsigned esz = dtype_esz[a->dtype];
4912     unsigned msz = dtype_msz(a->dtype);
4913     TCGLabel *over = gen_new_label();
4914     TCGv_i64 temp;
4915
4916     /* If the guarding predicate has no bits set, no load occurs.  */
4917     if (psz <= 8) {
4918         /* Reduce the pred_esz_masks value simply to reduce the
4919          * size of the code generated here.
4920          */
4921         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4922         temp = tcg_temp_new_i64();
4923         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4924         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4925         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4926         tcg_temp_free_i64(temp);
4927     } else {
4928         TCGv_i32 t32 = tcg_temp_new_i32();
4929         find_last_active(s, t32, esz, a->pg);
4930         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4931         tcg_temp_free_i32(t32);
4932     }
4933
4934     /* Load the data.  */
4935     temp = tcg_temp_new_i64();
4936     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4937     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4938                         s->be_data | dtype_mop[a->dtype]);
4939
4940     /* Broadcast to *all* elements.  */
4941     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4942                          vsz, vsz, temp);
4943     tcg_temp_free_i64(temp);
4944
4945     /* Zero the inactive elements.  */
4946     gen_set_label(over);
4947     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4948     return true;
4949 }
4950
4951 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4952                       int msz, int esz, int nreg)
4953 {
4954     static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4955         { { gen_helper_sve_st1bb_r,
4956             gen_helper_sve_st1bh_r,
4957             gen_helper_sve_st1bs_r,
4958             gen_helper_sve_st1bd_r },
4959           { NULL,
4960             gen_helper_sve_st1hh_le_r,
4961             gen_helper_sve_st1hs_le_r,
4962             gen_helper_sve_st1hd_le_r },
4963           { NULL, NULL,
4964             gen_helper_sve_st1ss_le_r,
4965             gen_helper_sve_st1sd_le_r },
4966           { NULL, NULL, NULL,
4967             gen_helper_sve_st1dd_le_r } },
4968         { { gen_helper_sve_st1bb_r,
4969             gen_helper_sve_st1bh_r,
4970             gen_helper_sve_st1bs_r,
4971             gen_helper_sve_st1bd_r },
4972           { NULL,
4973             gen_helper_sve_st1hh_be_r,
4974             gen_helper_sve_st1hs_be_r,
4975             gen_helper_sve_st1hd_be_r },
4976           { NULL, NULL,
4977             gen_helper_sve_st1ss_be_r,
4978             gen_helper_sve_st1sd_be_r },
4979           { NULL, NULL, NULL,
4980             gen_helper_sve_st1dd_be_r } },
4981     };
4982     static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4983         { { gen_helper_sve_st2bb_r,
4984             gen_helper_sve_st2hh_le_r,
4985             gen_helper_sve_st2ss_le_r,
4986             gen_helper_sve_st2dd_le_r },
4987           { gen_helper_sve_st3bb_r,
4988             gen_helper_sve_st3hh_le_r,
4989             gen_helper_sve_st3ss_le_r,
4990             gen_helper_sve_st3dd_le_r },
4991           { gen_helper_sve_st4bb_r,
4992             gen_helper_sve_st4hh_le_r,
4993             gen_helper_sve_st4ss_le_r,
4994             gen_helper_sve_st4dd_le_r } },
4995         { { gen_helper_sve_st2bb_r,
4996             gen_helper_sve_st2hh_be_r,
4997             gen_helper_sve_st2ss_be_r,
4998             gen_helper_sve_st2dd_be_r },
4999           { gen_helper_sve_st3bb_r,
5000             gen_helper_sve_st3hh_be_r,
5001             gen_helper_sve_st3ss_be_r,
5002             gen_helper_sve_st3dd_be_r },
5003           { gen_helper_sve_st4bb_r,
5004             gen_helper_sve_st4hh_be_r,
5005             gen_helper_sve_st4ss_be_r,
5006             gen_helper_sve_st4dd_be_r } },
5007     };
5008     gen_helper_gvec_mem *fn;
5009     int be = s->be_data == MO_BE;
5010
5011     if (nreg == 0) {
5012         /* ST1 */
5013         fn = fn_single[be][msz][esz];
5014     } else {
5015         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5016         assert(msz == esz);
5017         fn = fn_multiple[be][nreg - 1][msz];
5018     }
5019     assert(fn != NULL);
5020     do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
5021 }
5022
5023 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5024 {
5025     if (a->rm == 31 || a->msz > a->esz) {
5026         return false;
5027     }
5028     if (sve_access_check(s)) {
5029         TCGv_i64 addr = new_tmp_a64(s);
5030         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5031         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5032         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5033     }
5034     return true;
5035 }
5036
5037 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5038 {
5039     if (a->msz > a->esz) {
5040         return false;
5041     }
5042     if (sve_access_check(s)) {
5043         int vsz = vec_full_reg_size(s);
5044         int elements = vsz >> a->esz;
5045         TCGv_i64 addr = new_tmp_a64(s);
5046
5047         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5048                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5049         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5050     }
5051     return true;
5052 }
5053
5054 /*
5055  *** SVE gather loads / scatter stores
5056  */
5057
5058 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5059                        int scale, TCGv_i64 scalar, int msz,
5060                        gen_helper_gvec_mem_scatter *fn)
5061 {
5062     unsigned vsz = vec_full_reg_size(s);
5063     TCGv_ptr t_zm = tcg_temp_new_ptr();
5064     TCGv_ptr t_pg = tcg_temp_new_ptr();
5065     TCGv_ptr t_zt = tcg_temp_new_ptr();
5066     TCGv_i32 t_desc;
5067     int desc;
5068
5069     desc = sve_memopidx(s, msz_dtype(s, msz));
5070     desc |= scale << MEMOPIDX_SHIFT;
5071     desc = simd_desc(vsz, vsz, desc);
5072     t_desc = tcg_const_i32(desc);
5073
5074     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5075     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5076     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5077     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5078
5079     tcg_temp_free_ptr(t_zt);
5080     tcg_temp_free_ptr(t_zm);
5081     tcg_temp_free_ptr(t_pg);
5082     tcg_temp_free_i32(t_desc);
5083 }
5084
5085 /* Indexed by [be][ff][xs][u][msz].  */
5086 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5087     /* Little-endian */
5088     { { { { gen_helper_sve_ldbss_zsu,
5089             gen_helper_sve_ldhss_le_zsu,
5090             NULL, },
5091           { gen_helper_sve_ldbsu_zsu,
5092             gen_helper_sve_ldhsu_le_zsu,
5093             gen_helper_sve_ldss_le_zsu, } },
5094         { { gen_helper_sve_ldbss_zss,
5095             gen_helper_sve_ldhss_le_zss,
5096             NULL, },
5097           { gen_helper_sve_ldbsu_zss,
5098             gen_helper_sve_ldhsu_le_zss,
5099             gen_helper_sve_ldss_le_zss, } } },
5100
5101       /* First-fault */
5102       { { { gen_helper_sve_ldffbss_zsu,
5103             gen_helper_sve_ldffhss_le_zsu,
5104             NULL, },
5105           { gen_helper_sve_ldffbsu_zsu,
5106             gen_helper_sve_ldffhsu_le_zsu,
5107             gen_helper_sve_ldffss_le_zsu, } },
5108         { { gen_helper_sve_ldffbss_zss,
5109             gen_helper_sve_ldffhss_le_zss,
5110             NULL, },
5111           { gen_helper_sve_ldffbsu_zss,
5112             gen_helper_sve_ldffhsu_le_zss,
5113             gen_helper_sve_ldffss_le_zss, } } } },
5114
5115     /* Big-endian */
5116     { { { { gen_helper_sve_ldbss_zsu,
5117             gen_helper_sve_ldhss_be_zsu,
5118             NULL, },
5119           { gen_helper_sve_ldbsu_zsu,
5120             gen_helper_sve_ldhsu_be_zsu,
5121             gen_helper_sve_ldss_be_zsu, } },
5122         { { gen_helper_sve_ldbss_zss,
5123             gen_helper_sve_ldhss_be_zss,
5124             NULL, },
5125           { gen_helper_sve_ldbsu_zss,
5126             gen_helper_sve_ldhsu_be_zss,
5127             gen_helper_sve_ldss_be_zss, } } },
5128
5129       /* First-fault */
5130       { { { gen_helper_sve_ldffbss_zsu,
5131             gen_helper_sve_ldffhss_be_zsu,
5132             NULL, },
5133           { gen_helper_sve_ldffbsu_zsu,
5134             gen_helper_sve_ldffhsu_be_zsu,
5135             gen_helper_sve_ldffss_be_zsu, } },
5136         { { gen_helper_sve_ldffbss_zss,
5137             gen_helper_sve_ldffhss_be_zss,
5138             NULL, },
5139           { gen_helper_sve_ldffbsu_zss,
5140             gen_helper_sve_ldffhsu_be_zss,
5141             gen_helper_sve_ldffss_be_zss, } } } },
5142 };
5143
5144 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5145 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5146     /* Little-endian */
5147     { { { { gen_helper_sve_ldbds_zsu,
5148             gen_helper_sve_ldhds_le_zsu,
5149             gen_helper_sve_ldsds_le_zsu,
5150             NULL, },
5151           { gen_helper_sve_ldbdu_zsu,
5152             gen_helper_sve_ldhdu_le_zsu,
5153             gen_helper_sve_ldsdu_le_zsu,
5154             gen_helper_sve_lddd_le_zsu, } },
5155         { { gen_helper_sve_ldbds_zss,
5156             gen_helper_sve_ldhds_le_zss,
5157             gen_helper_sve_ldsds_le_zss,
5158             NULL, },
5159           { gen_helper_sve_ldbdu_zss,
5160             gen_helper_sve_ldhdu_le_zss,
5161             gen_helper_sve_ldsdu_le_zss,
5162             gen_helper_sve_lddd_le_zss, } },
5163         { { gen_helper_sve_ldbds_zd,
5164             gen_helper_sve_ldhds_le_zd,
5165             gen_helper_sve_ldsds_le_zd,
5166             NULL, },
5167           { gen_helper_sve_ldbdu_zd,
5168             gen_helper_sve_ldhdu_le_zd,
5169             gen_helper_sve_ldsdu_le_zd,
5170             gen_helper_sve_lddd_le_zd, } } },
5171
5172       /* First-fault */
5173       { { { gen_helper_sve_ldffbds_zsu,
5174             gen_helper_sve_ldffhds_le_zsu,
5175             gen_helper_sve_ldffsds_le_zsu,
5176             NULL, },
5177           { gen_helper_sve_ldffbdu_zsu,
5178             gen_helper_sve_ldffhdu_le_zsu,
5179             gen_helper_sve_ldffsdu_le_zsu,
5180             gen_helper_sve_ldffdd_le_zsu, } },
5181         { { gen_helper_sve_ldffbds_zss,
5182             gen_helper_sve_ldffhds_le_zss,
5183             gen_helper_sve_ldffsds_le_zss,
5184             NULL, },
5185           { gen_helper_sve_ldffbdu_zss,
5186             gen_helper_sve_ldffhdu_le_zss,
5187             gen_helper_sve_ldffsdu_le_zss,
5188             gen_helper_sve_ldffdd_le_zss, } },
5189         { { gen_helper_sve_ldffbds_zd,
5190             gen_helper_sve_ldffhds_le_zd,
5191             gen_helper_sve_ldffsds_le_zd,
5192             NULL, },
5193           { gen_helper_sve_ldffbdu_zd,
5194             gen_helper_sve_ldffhdu_le_zd,
5195             gen_helper_sve_ldffsdu_le_zd,
5196             gen_helper_sve_ldffdd_le_zd, } } } },
5197
5198     /* Big-endian */
5199     { { { { gen_helper_sve_ldbds_zsu,
5200             gen_helper_sve_ldhds_be_zsu,
5201             gen_helper_sve_ldsds_be_zsu,
5202             NULL, },
5203           { gen_helper_sve_ldbdu_zsu,
5204             gen_helper_sve_ldhdu_be_zsu,
5205             gen_helper_sve_ldsdu_be_zsu,
5206             gen_helper_sve_lddd_be_zsu, } },
5207         { { gen_helper_sve_ldbds_zss,
5208             gen_helper_sve_ldhds_be_zss,
5209             gen_helper_sve_ldsds_be_zss,
5210             NULL, },
5211           { gen_helper_sve_ldbdu_zss,
5212             gen_helper_sve_ldhdu_be_zss,
5213             gen_helper_sve_ldsdu_be_zss,
5214             gen_helper_sve_lddd_be_zss, } },
5215         { { gen_helper_sve_ldbds_zd,
5216             gen_helper_sve_ldhds_be_zd,
5217             gen_helper_sve_ldsds_be_zd,
5218             NULL, },
5219           { gen_helper_sve_ldbdu_zd,
5220             gen_helper_sve_ldhdu_be_zd,
5221             gen_helper_sve_ldsdu_be_zd,
5222             gen_helper_sve_lddd_be_zd, } } },
5223
5224       /* First-fault */
5225       { { { gen_helper_sve_ldffbds_zsu,
5226             gen_helper_sve_ldffhds_be_zsu,
5227             gen_helper_sve_ldffsds_be_zsu,
5228             NULL, },
5229           { gen_helper_sve_ldffbdu_zsu,
5230             gen_helper_sve_ldffhdu_be_zsu,
5231             gen_helper_sve_ldffsdu_be_zsu,
5232             gen_helper_sve_ldffdd_be_zsu, } },
5233         { { gen_helper_sve_ldffbds_zss,
5234             gen_helper_sve_ldffhds_be_zss,
5235             gen_helper_sve_ldffsds_be_zss,
5236             NULL, },
5237           { gen_helper_sve_ldffbdu_zss,
5238             gen_helper_sve_ldffhdu_be_zss,
5239             gen_helper_sve_ldffsdu_be_zss,
5240             gen_helper_sve_ldffdd_be_zss, } },
5241         { { gen_helper_sve_ldffbds_zd,
5242             gen_helper_sve_ldffhds_be_zd,
5243             gen_helper_sve_ldffsds_be_zd,
5244             NULL, },
5245           { gen_helper_sve_ldffbdu_zd,
5246             gen_helper_sve_ldffhdu_be_zd,
5247             gen_helper_sve_ldffsdu_be_zd,
5248             gen_helper_sve_ldffdd_be_zd, } } } },
5249 };
5250
5251 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5252 {
5253     gen_helper_gvec_mem_scatter *fn = NULL;
5254     int be = s->be_data == MO_BE;
5255
5256     if (!sve_access_check(s)) {
5257         return true;
5258     }
5259
5260     switch (a->esz) {
5261     case MO_32:
5262         fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5263         break;
5264     case MO_64:
5265         fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5266         break;
5267     }
5268     assert(fn != NULL);
5269
5270     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5271                cpu_reg_sp(s, a->rn), a->msz, fn);
5272     return true;
5273 }
5274
5275 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5276 {
5277     gen_helper_gvec_mem_scatter *fn = NULL;
5278     int be = s->be_data == MO_BE;
5279     TCGv_i64 imm;
5280
5281     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5282         return false;
5283     }
5284     if (!sve_access_check(s)) {
5285         return true;
5286     }
5287
5288     switch (a->esz) {
5289     case MO_32:
5290         fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5291         break;
5292     case MO_64:
5293         fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5294         break;
5295     }
5296     assert(fn != NULL);
5297
5298     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5299      * by loading the immediate into the scalar parameter.
5300      */
5301     imm = tcg_const_i64(a->imm << a->msz);
5302     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5303     tcg_temp_free_i64(imm);
5304     return true;
5305 }
5306
5307 /* Indexed by [be][xs][msz].  */
5308 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5309     /* Little-endian */
5310     { { gen_helper_sve_stbs_zsu,
5311         gen_helper_sve_sths_le_zsu,
5312         gen_helper_sve_stss_le_zsu, },
5313       { gen_helper_sve_stbs_zss,
5314         gen_helper_sve_sths_le_zss,
5315         gen_helper_sve_stss_le_zss, } },
5316     /* Big-endian */
5317     { { gen_helper_sve_stbs_zsu,
5318         gen_helper_sve_sths_be_zsu,
5319         gen_helper_sve_stss_be_zsu, },
5320       { gen_helper_sve_stbs_zss,
5321         gen_helper_sve_sths_be_zss,
5322         gen_helper_sve_stss_be_zss, } },
5323 };
5324
5325 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5326 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5327     /* Little-endian */
5328     { { gen_helper_sve_stbd_zsu,
5329         gen_helper_sve_sthd_le_zsu,
5330         gen_helper_sve_stsd_le_zsu,
5331         gen_helper_sve_stdd_le_zsu, },
5332       { gen_helper_sve_stbd_zss,
5333         gen_helper_sve_sthd_le_zss,
5334         gen_helper_sve_stsd_le_zss,
5335         gen_helper_sve_stdd_le_zss, },
5336       { gen_helper_sve_stbd_zd,
5337         gen_helper_sve_sthd_le_zd,
5338         gen_helper_sve_stsd_le_zd,
5339         gen_helper_sve_stdd_le_zd, } },
5340     /* Big-endian */
5341     { { gen_helper_sve_stbd_zsu,
5342         gen_helper_sve_sthd_be_zsu,
5343         gen_helper_sve_stsd_be_zsu,
5344         gen_helper_sve_stdd_be_zsu, },
5345       { gen_helper_sve_stbd_zss,
5346         gen_helper_sve_sthd_be_zss,
5347         gen_helper_sve_stsd_be_zss,
5348         gen_helper_sve_stdd_be_zss, },
5349       { gen_helper_sve_stbd_zd,
5350         gen_helper_sve_sthd_be_zd,
5351         gen_helper_sve_stsd_be_zd,
5352         gen_helper_sve_stdd_be_zd, } },
5353 };
5354
5355 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5356 {
5357     gen_helper_gvec_mem_scatter *fn;
5358     int be = s->be_data == MO_BE;
5359
5360     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5361         return false;
5362     }
5363     if (!sve_access_check(s)) {
5364         return true;
5365     }
5366     switch (a->esz) {
5367     case MO_32:
5368         fn = scatter_store_fn32[be][a->xs][a->msz];
5369         break;
5370     case MO_64:
5371         fn = scatter_store_fn64[be][a->xs][a->msz];
5372         break;
5373     default:
5374         g_assert_not_reached();
5375     }
5376     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5377                cpu_reg_sp(s, a->rn), a->msz, fn);
5378     return true;
5379 }
5380
5381 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5382 {
5383     gen_helper_gvec_mem_scatter *fn = NULL;
5384     int be = s->be_data == MO_BE;
5385     TCGv_i64 imm;
5386
5387     if (a->esz < a->msz) {
5388         return false;
5389     }
5390     if (!sve_access_check(s)) {
5391         return true;
5392     }
5393
5394     switch (a->esz) {
5395     case MO_32:
5396         fn = scatter_store_fn32[be][0][a->msz];
5397         break;
5398     case MO_64:
5399         fn = scatter_store_fn64[be][2][a->msz];
5400         break;
5401     }
5402     assert(fn != NULL);
5403
5404     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5405      * by loading the immediate into the scalar parameter.
5406      */
5407     imm = tcg_const_i64(a->imm << a->msz);
5408     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5409     tcg_temp_free_i64(imm);
5410     return true;
5411 }
5412
5413 /*
5414  * Prefetches
5415  */
5416
5417 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5418 {
5419     /* Prefetch is a nop within QEMU.  */
5420     (void)sve_access_check(s);
5421     return true;
5422 }
5423
5424 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5425 {
5426     if (a->rm == 31) {
5427         return false;
5428     }
5429     /* Prefetch is a nop within QEMU.  */
5430     (void)sve_access_check(s);
5431     return true;
5432 }
5433
5434 /*
5435  * Move Prefix
5436  *
5437  * TODO: The implementation so far could handle predicated merging movprfx.
5438  * The helper functions as written take an extra source register to
5439  * use in the operation, but the result is only written when predication
5440  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5441  * to allow the final write back to the destination to be unconditional.
5442  * For predicated zeroing movprfx, we need to rearrange the helpers to
5443  * allow the final write back to zero inactives.
5444  *
5445  * In the meantime, just emit the moves.
5446  */
5447
5448 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5449 {
5450     return do_mov_z(s, a->rd, a->rn);
5451 }
5452
5453 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5454 {
5455     if (sve_access_check(s)) {
5456         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5457     }
5458     return true;
5459 }
5460
5461 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5462 {
5463     if (sve_access_check(s)) {
5464         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5465     }
5466     return true;
5467 }