target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294 }
 295
 296 /*
 297  *** SVE Integer Arithmetic - Unpredicated Group
 298  */
 299
 300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321 {
 322     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323 }
 324
 325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326 {
 327     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328 }
 329
 330 /*
 331  *** SVE Integer Arithmetic - Binary Predicated Group
 332  */
 333
 334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335 {
 336     unsigned vsz = vec_full_reg_size(s);
 337     if (fn == NULL) {
 338         return false;
 339     }
 340     if (sve_access_check(s)) {
 341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                            vec_full_reg_offset(s, a->rn),
 343                            vec_full_reg_offset(s, a->rm),
 344                            pred_full_reg_offset(s, a->pg),
 345                            vsz, vsz, 0, fn);
 346     }
 347     return true;
 348 }
 349
 350 /* Select active elememnts from Zn and inactive elements from Zm,
 351  * storing the result in Zd.
 352  */
 353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354 {
 355     static gen_helper_gvec_4 * const fns[4] = {
 356         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358     };
 359     unsigned vsz = vec_full_reg_size(s);
 360     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                        vec_full_reg_offset(s, rn),
 362                        vec_full_reg_offset(s, rm),
 363                        pred_full_reg_offset(s, pg),
 364                        vsz, vsz, 0, fns[esz]);
 365 }
 366
 367 #define DO_ZPZZ(NAME, name) \
 368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369 {                                                                         \
 370     static gen_helper_gvec_4 * const fns[4] = {                           \
 371         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373     };                                                                    \
 374     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375 }
 376
 377 DO_ZPZZ(AND, and)
 378 DO_ZPZZ(EOR, eor)
 379 DO_ZPZZ(ORR, orr)
 380 DO_ZPZZ(BIC, bic)
 381
 382 DO_ZPZZ(ADD, add)
 383 DO_ZPZZ(SUB, sub)
 384
 385 DO_ZPZZ(SMAX, smax)
 386 DO_ZPZZ(UMAX, umax)
 387 DO_ZPZZ(SMIN, smin)
 388 DO_ZPZZ(UMIN, umin)
 389 DO_ZPZZ(SABD, sabd)
 390 DO_ZPZZ(UABD, uabd)
 391
 392 DO_ZPZZ(MUL, mul)
 393 DO_ZPZZ(SMULH, smulh)
 394 DO_ZPZZ(UMULH, umulh)
 395
 396 DO_ZPZZ(ASR, asr)
 397 DO_ZPZZ(LSR, lsr)
 398 DO_ZPZZ(LSL, lsl)
 399
 400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401 {
 402     static gen_helper_gvec_4 * const fns[4] = {
 403         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404     };
 405     return do_zpzz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409 {
 410     static gen_helper_gvec_4 * const fns[4] = {
 411         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412     };
 413     return do_zpzz_ool(s, a, fns[a->esz]);
 414 }
 415
 416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417 {
 418     if (sve_access_check(s)) {
 419         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420     }
 421     return true;
 422 }
 423
 424 #undef DO_ZPZZ
 425
 426 /*
 427  *** SVE Integer Arithmetic - Unary Predicated Group
 428  */
 429
 430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431 {
 432     if (fn == NULL) {
 433         return false;
 434     }
 435     if (sve_access_check(s)) {
 436         unsigned vsz = vec_full_reg_size(s);
 437         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                            vec_full_reg_offset(s, a->rn),
 439                            pred_full_reg_offset(s, a->pg),
 440                            vsz, vsz, 0, fn);
 441     }
 442     return true;
 443 }
 444
 445 #define DO_ZPZ(NAME, name) \
 446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447 {                                                                   \
 448     static gen_helper_gvec_3 * const fns[4] = {                     \
 449         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451     };                                                              \
 452     return do_zpz_ool(s, a, fns[a->esz]);                           \
 453 }
 454
 455 DO_ZPZ(CLS, cls)
 456 DO_ZPZ(CLZ, clz)
 457 DO_ZPZ(CNT_zpz, cnt_zpz)
 458 DO_ZPZ(CNOT, cnot)
 459 DO_ZPZ(NOT_zpz, not_zpz)
 460 DO_ZPZ(ABS, abs)
 461 DO_ZPZ(NEG, neg)
 462
 463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL,
 467         gen_helper_sve_fabs_h,
 468         gen_helper_sve_fabs_s,
 469         gen_helper_sve_fabs_d
 470     };
 471     return do_zpz_ool(s, a, fns[a->esz]);
 472 }
 473
 474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475 {
 476     static gen_helper_gvec_3 * const fns[4] = {
 477         NULL,
 478         gen_helper_sve_fneg_h,
 479         gen_helper_sve_fneg_s,
 480         gen_helper_sve_fneg_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL,
 489         gen_helper_sve_sxtb_h,
 490         gen_helper_sve_sxtb_s,
 491         gen_helper_sve_sxtb_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL,
 500         gen_helper_sve_uxtb_h,
 501         gen_helper_sve_uxtb_s,
 502         gen_helper_sve_uxtb_d
 503     };
 504     return do_zpz_ool(s, a, fns[a->esz]);
 505 }
 506
 507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508 {
 509     static gen_helper_gvec_3 * const fns[4] = {
 510         NULL, NULL,
 511         gen_helper_sve_sxth_s,
 512         gen_helper_sve_sxth_d
 513     };
 514     return do_zpz_ool(s, a, fns[a->esz]);
 515 }
 516
 517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518 {
 519     static gen_helper_gvec_3 * const fns[4] = {
 520         NULL, NULL,
 521         gen_helper_sve_uxth_s,
 522         gen_helper_sve_uxth_d
 523     };
 524     return do_zpz_ool(s, a, fns[a->esz]);
 525 }
 526
 527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528 {
 529     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530 }
 531
 532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535 }
 536
 537 #undef DO_ZPZ
 538
 539 /*
 540  *** SVE Integer Reduction Group
 541  */
 542
 543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                        gen_helper_gvec_reduc *fn)
 546 {
 547     unsigned vsz = vec_full_reg_size(s);
 548     TCGv_ptr t_zn, t_pg;
 549     TCGv_i32 desc;
 550     TCGv_i64 temp;
 551
 552     if (fn == NULL) {
 553         return false;
 554     }
 555     if (!sve_access_check(s)) {
 556         return true;
 557     }
 558
 559     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560     temp = tcg_temp_new_i64();
 561     t_zn = tcg_temp_new_ptr();
 562     t_pg = tcg_temp_new_ptr();
 563
 564     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566     fn(temp, t_zn, t_pg, desc);
 567     tcg_temp_free_ptr(t_zn);
 568     tcg_temp_free_ptr(t_pg);
 569     tcg_temp_free_i32(desc);
 570
 571     write_fp_dreg(s, a->rd, temp);
 572     tcg_temp_free_i64(temp);
 573     return true;
 574 }
 575
 576 #define DO_VPZ(NAME, name) \
 577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578 {                                                                        \
 579     static gen_helper_gvec_reduc * const fns[4] = {                      \
 580         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582     };                                                                   \
 583     return do_vpz_ool(s, a, fns[a->esz]);                                \
 584 }
 585
 586 DO_VPZ(ORV, orv)
 587 DO_VPZ(ANDV, andv)
 588 DO_VPZ(EORV, eorv)
 589
 590 DO_VPZ(UADDV, uaddv)
 591 DO_VPZ(SMAXV, smaxv)
 592 DO_VPZ(UMAXV, umaxv)
 593 DO_VPZ(SMINV, sminv)
 594 DO_VPZ(UMINV, uminv)
 595
 596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597 {
 598     static gen_helper_gvec_reduc * const fns[4] = {
 599         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600         gen_helper_sve_saddv_s, NULL
 601     };
 602     return do_vpz_ool(s, a, fns[a->esz]);
 603 }
 604
 605 #undef DO_VPZ
 606
 607 /*
 608  *** SVE Shift by Immediate - Predicated Group
 609  */
 610
 611 /* Store zero into every active element of Zd.  We will use this for two
 612  * and three-operand predicated instructions for which logic dictates a
 613  * zero result.
 614  */
 615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616 {
 617     static gen_helper_gvec_2 * const fns[4] = {
 618         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620     };
 621     if (sve_access_check(s)) {
 622         unsigned vsz = vec_full_reg_size(s);
 623         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                            pred_full_reg_offset(s, pg),
 625                            vsz, vsz, 0, fns[esz]);
 626     }
 627     return true;
 628 }
 629
 630 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636     };
 637     unsigned vsz = vec_full_reg_size(s);
 638     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                        vec_full_reg_offset(s, rn),
 640                        pred_full_reg_offset(s, pg),
 641                        vsz, vsz, 0, fns[esz]);
 642 }
 643
 644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                         gen_helper_gvec_3 *fn)
 646 {
 647     if (sve_access_check(s)) {
 648         unsigned vsz = vec_full_reg_size(s);
 649         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                            vec_full_reg_offset(s, a->rn),
 651                            pred_full_reg_offset(s, a->pg),
 652                            vsz, vsz, a->imm, fn);
 653     }
 654     return true;
 655 }
 656
 657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658 {
 659     static gen_helper_gvec_3 * const fns[4] = {
 660         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662     };
 663     if (a->esz < 0) {
 664         /* Invalid tsz encoding -- see tszimm_esz. */
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For
 668        arithmetic right-shift, it's the same as by one less. */
 669     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670     return do_zpzi_ool(s, a, fns[a->esz]);
 671 }
 672
 673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.
 701        For logical shifts, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710 {
 711     static gen_helper_gvec_3 * const fns[4] = {
 712         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714     };
 715     if (a->esz < 0) {
 716         return false;
 717     }
 718     /* Shift by element size is architecturally valid.  For arithmetic
 719        right shift for division, it is a zeroing operation.  */
 720     if (a->imm >= (8 << a->esz)) {
 721         return do_clr_zp(s, a->rd, a->pg, a->esz);
 722     } else {
 723         return do_zpzi_ool(s, a, fns[a->esz]);
 724     }
 725 }
 726
 727 /*
 728  *** SVE Bitwise Shift - Predicated Group
 729  */
 730
 731 #define DO_ZPZW(NAME, name) \
 732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733 {                                                                         \
 734     static gen_helper_gvec_4 * const fns[3] = {                           \
 735         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736         gen_helper_sve_##name##_zpzw_s,                                   \
 737     };                                                                    \
 738     if (a->esz < 0 || a->esz >= 3) {                                      \
 739         return false;                                                     \
 740     }                                                                     \
 741     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742 }
 743
 744 DO_ZPZW(ASR, asr)
 745 DO_ZPZW(LSR, lsr)
 746 DO_ZPZW(LSL, lsl)
 747
 748 #undef DO_ZPZW
 749
 750 /*
 751  *** SVE Bitwise Shift - Unpredicated Group
 752  */
 753
 754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                          int64_t, uint32_t, uint32_t))
 757 {
 758     if (a->esz < 0) {
 759         /* Invalid tsz encoding -- see tszimm_esz. */
 760         return false;
 761     }
 762     if (sve_access_check(s)) {
 763         unsigned vsz = vec_full_reg_size(s);
 764         /* Shift by element size is architecturally valid.  For
 765            arithmetic right-shift, it's the same as by one less.
 766            Otherwise it is a zeroing operation.  */
 767         if (a->imm >= 8 << a->esz) {
 768             if (asr) {
 769                 a->imm = (8 << a->esz) - 1;
 770             } else {
 771                 do_dupi_z(s, a->rd, 0);
 772                 return true;
 773             }
 774         }
 775         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777     }
 778     return true;
 779 }
 780
 781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782 {
 783     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784 }
 785
 786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787 {
 788     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789 }
 790
 791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792 {
 793     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794 }
 795
 796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797 {
 798     if (fn == NULL) {
 799         return false;
 800     }
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->rn),
 805                            vec_full_reg_offset(s, a->rm),
 806                            vsz, vsz, 0, fn);
 807     }
 808     return true;
 809 }
 810
 811 #define DO_ZZW(NAME, name) \
 812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813 {                                                                         \
 814     static gen_helper_gvec_3 * const fns[4] = {                           \
 815         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816         gen_helper_sve_##name##_zzw_s, NULL                               \
 817     };                                                                    \
 818     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819 }
 820
 821 DO_ZZW(ASR, asr)
 822 DO_ZZW(LSR, lsr)
 823 DO_ZZW(LSL, lsl)
 824
 825 #undef DO_ZZW
 826
 827 /*
 828  *** SVE Integer Multiply-Add Group
 829  */
 830
 831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                          gen_helper_gvec_5 *fn)
 833 {
 834     if (sve_access_check(s)) {
 835         unsigned vsz = vec_full_reg_size(s);
 836         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                            vec_full_reg_offset(s, a->ra),
 838                            vec_full_reg_offset(s, a->rn),
 839                            vec_full_reg_offset(s, a->rm),
 840                            pred_full_reg_offset(s, a->pg),
 841                            vsz, vsz, 0, fn);
 842     }
 843     return true;
 844 }
 845
 846 #define DO_ZPZZZ(NAME, name) \
 847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848 {                                                                    \
 849     static gen_helper_gvec_5 * const fns[4] = {                      \
 850         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852     };                                                               \
 853     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854 }
 855
 856 DO_ZPZZZ(MLA, mla)
 857 DO_ZPZZZ(MLS, mls)
 858
 859 #undef DO_ZPZZZ
 860
 861 /*
 862  *** SVE Index Generation Group
 863  */
 864
 865 static void do_index(DisasContext *s, int esz, int rd,
 866                      TCGv_i64 start, TCGv_i64 incr)
 867 {
 868     unsigned vsz = vec_full_reg_size(s);
 869     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870     TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873     if (esz == 3) {
 874         gen_helper_sve_index_d(t_zd, start, incr, desc);
 875     } else {
 876         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877         static index_fn * const fns[3] = {
 878             gen_helper_sve_index_b,
 879             gen_helper_sve_index_h,
 880             gen_helper_sve_index_s,
 881         };
 882         TCGv_i32 s32 = tcg_temp_new_i32();
 883         TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885         tcg_gen_extrl_i64_i32(s32, start);
 886         tcg_gen_extrl_i64_i32(i32, incr);
 887         fns[esz](t_zd, s32, i32, desc);
 888
 889         tcg_temp_free_i32(s32);
 890         tcg_temp_free_i32(i32);
 891     }
 892     tcg_temp_free_ptr(t_zd);
 893     tcg_temp_free_i32(desc);
 894 }
 895
 896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897 {
 898     if (sve_access_check(s)) {
 899         TCGv_i64 start = tcg_const_i64(a->imm1);
 900         TCGv_i64 incr = tcg_const_i64(a->imm2);
 901         do_index(s, a->esz, a->rd, start, incr);
 902         tcg_temp_free_i64(start);
 903         tcg_temp_free_i64(incr);
 904     }
 905     return true;
 906 }
 907
 908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909 {
 910     if (sve_access_check(s)) {
 911         TCGv_i64 start = tcg_const_i64(a->imm);
 912         TCGv_i64 incr = cpu_reg(s, a->rm);
 913         do_index(s, a->esz, a->rd, start, incr);
 914         tcg_temp_free_i64(start);
 915     }
 916     return true;
 917 }
 918
 919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920 {
 921     if (sve_access_check(s)) {
 922         TCGv_i64 start = cpu_reg(s, a->rn);
 923         TCGv_i64 incr = tcg_const_i64(a->imm);
 924         do_index(s, a->esz, a->rd, start, incr);
 925         tcg_temp_free_i64(incr);
 926     }
 927     return true;
 928 }
 929
 930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931 {
 932     if (sve_access_check(s)) {
 933         TCGv_i64 start = cpu_reg(s, a->rn);
 934         TCGv_i64 incr = cpu_reg(s, a->rm);
 935         do_index(s, a->esz, a->rd, start, incr);
 936     }
 937     return true;
 938 }
 939
 940 /*
 941  *** SVE Stack Allocation Group
 942  */
 943
 944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945 {
 946     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 947     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 948     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 949     return true;
 950 }
 951
 952 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 953 {
 954     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 955     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 956     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 957     return true;
 958 }
 959
 960 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 961 {
 962     TCGv_i64 reg = cpu_reg(s, a->rd);
 963     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 964     return true;
 965 }
 966
 967 /*
 968  *** SVE Compute Vector Address Group
 969  */
 970
 971 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 972 {
 973     if (sve_access_check(s)) {
 974         unsigned vsz = vec_full_reg_size(s);
 975         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 976                            vec_full_reg_offset(s, a->rn),
 977                            vec_full_reg_offset(s, a->rm),
 978                            vsz, vsz, a->imm, fn);
 979     }
 980     return true;
 981 }
 982
 983 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 984 {
 985     return do_adr(s, a, gen_helper_sve_adr_p32);
 986 }
 987
 988 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 989 {
 990     return do_adr(s, a, gen_helper_sve_adr_p64);
 991 }
 992
 993 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
 994 {
 995     return do_adr(s, a, gen_helper_sve_adr_s32);
 996 }
 997
 998 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
 999 {
1000     return do_adr(s, a, gen_helper_sve_adr_u32);
1001 }
1002
1003 /*
1004  *** SVE Integer Misc - Unpredicated Group
1005  */
1006
1007 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1008 {
1009     static gen_helper_gvec_2 * const fns[4] = {
1010         NULL,
1011         gen_helper_sve_fexpa_h,
1012         gen_helper_sve_fexpa_s,
1013         gen_helper_sve_fexpa_d,
1014     };
1015     if (a->esz == 0) {
1016         return false;
1017     }
1018     if (sve_access_check(s)) {
1019         unsigned vsz = vec_full_reg_size(s);
1020         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1021                            vec_full_reg_offset(s, a->rn),
1022                            vsz, vsz, 0, fns[a->esz]);
1023     }
1024     return true;
1025 }
1026
1027 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1028 {
1029     static gen_helper_gvec_3 * const fns[4] = {
1030         NULL,
1031         gen_helper_sve_ftssel_h,
1032         gen_helper_sve_ftssel_s,
1033         gen_helper_sve_ftssel_d,
1034     };
1035     if (a->esz == 0) {
1036         return false;
1037     }
1038     if (sve_access_check(s)) {
1039         unsigned vsz = vec_full_reg_size(s);
1040         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1041                            vec_full_reg_offset(s, a->rn),
1042                            vec_full_reg_offset(s, a->rm),
1043                            vsz, vsz, 0, fns[a->esz]);
1044     }
1045     return true;
1046 }
1047
1048 /*
1049  *** SVE Predicate Logical Operations Group
1050  */
1051
1052 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1053                           const GVecGen4 *gvec_op)
1054 {
1055     if (!sve_access_check(s)) {
1056         return true;
1057     }
1058
1059     unsigned psz = pred_gvec_reg_size(s);
1060     int dofs = pred_full_reg_offset(s, a->rd);
1061     int nofs = pred_full_reg_offset(s, a->rn);
1062     int mofs = pred_full_reg_offset(s, a->rm);
1063     int gofs = pred_full_reg_offset(s, a->pg);
1064
1065     if (psz == 8) {
1066         /* Do the operation and the flags generation in temps.  */
1067         TCGv_i64 pd = tcg_temp_new_i64();
1068         TCGv_i64 pn = tcg_temp_new_i64();
1069         TCGv_i64 pm = tcg_temp_new_i64();
1070         TCGv_i64 pg = tcg_temp_new_i64();
1071
1072         tcg_gen_ld_i64(pn, cpu_env, nofs);
1073         tcg_gen_ld_i64(pm, cpu_env, mofs);
1074         tcg_gen_ld_i64(pg, cpu_env, gofs);
1075
1076         gvec_op->fni8(pd, pn, pm, pg);
1077         tcg_gen_st_i64(pd, cpu_env, dofs);
1078
1079         do_predtest1(pd, pg);
1080
1081         tcg_temp_free_i64(pd);
1082         tcg_temp_free_i64(pn);
1083         tcg_temp_free_i64(pm);
1084         tcg_temp_free_i64(pg);
1085     } else {
1086         /* The operation and flags generation is large.  The computation
1087          * of the flags depends on the original contents of the guarding
1088          * predicate.  If the destination overwrites the guarding predicate,
1089          * then the easiest way to get this right is to save a copy.
1090           */
1091         int tofs = gofs;
1092         if (a->rd == a->pg) {
1093             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1094             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1095         }
1096
1097         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1098         do_predtest(s, dofs, tofs, psz / 8);
1099     }
1100     return true;
1101 }
1102
1103 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1104 {
1105     tcg_gen_and_i64(pd, pn, pm);
1106     tcg_gen_and_i64(pd, pd, pg);
1107 }
1108
1109 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1110                            TCGv_vec pm, TCGv_vec pg)
1111 {
1112     tcg_gen_and_vec(vece, pd, pn, pm);
1113     tcg_gen_and_vec(vece, pd, pd, pg);
1114 }
1115
1116 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1117 {
1118     static const GVecGen4 op = {
1119         .fni8 = gen_and_pg_i64,
1120         .fniv = gen_and_pg_vec,
1121         .fno = gen_helper_sve_and_pppp,
1122         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1123     };
1124     if (a->s) {
1125         return do_pppp_flags(s, a, &op);
1126     } else if (a->rn == a->rm) {
1127         if (a->pg == a->rn) {
1128             return do_mov_p(s, a->rd, a->rn);
1129         } else {
1130             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1131         }
1132     } else if (a->pg == a->rn || a->pg == a->rm) {
1133         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1134     } else {
1135         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1136     }
1137 }
1138
1139 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1140 {
1141     tcg_gen_andc_i64(pd, pn, pm);
1142     tcg_gen_and_i64(pd, pd, pg);
1143 }
1144
1145 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1146                            TCGv_vec pm, TCGv_vec pg)
1147 {
1148     tcg_gen_andc_vec(vece, pd, pn, pm);
1149     tcg_gen_and_vec(vece, pd, pd, pg);
1150 }
1151
1152 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1153 {
1154     static const GVecGen4 op = {
1155         .fni8 = gen_bic_pg_i64,
1156         .fniv = gen_bic_pg_vec,
1157         .fno = gen_helper_sve_bic_pppp,
1158         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1159     };
1160     if (a->s) {
1161         return do_pppp_flags(s, a, &op);
1162     } else if (a->pg == a->rn) {
1163         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1164     } else {
1165         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1166     }
1167 }
1168
1169 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1170 {
1171     tcg_gen_xor_i64(pd, pn, pm);
1172     tcg_gen_and_i64(pd, pd, pg);
1173 }
1174
1175 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1176                            TCGv_vec pm, TCGv_vec pg)
1177 {
1178     tcg_gen_xor_vec(vece, pd, pn, pm);
1179     tcg_gen_and_vec(vece, pd, pd, pg);
1180 }
1181
1182 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1183 {
1184     static const GVecGen4 op = {
1185         .fni8 = gen_eor_pg_i64,
1186         .fniv = gen_eor_pg_vec,
1187         .fno = gen_helper_sve_eor_pppp,
1188         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1189     };
1190     if (a->s) {
1191         return do_pppp_flags(s, a, &op);
1192     } else {
1193         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1194     }
1195 }
1196
1197 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1198 {
1199     tcg_gen_and_i64(pn, pn, pg);
1200     tcg_gen_andc_i64(pm, pm, pg);
1201     tcg_gen_or_i64(pd, pn, pm);
1202 }
1203
1204 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1205                            TCGv_vec pm, TCGv_vec pg)
1206 {
1207     tcg_gen_and_vec(vece, pn, pn, pg);
1208     tcg_gen_andc_vec(vece, pm, pm, pg);
1209     tcg_gen_or_vec(vece, pd, pn, pm);
1210 }
1211
1212 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1213 {
1214     static const GVecGen4 op = {
1215         .fni8 = gen_sel_pg_i64,
1216         .fniv = gen_sel_pg_vec,
1217         .fno = gen_helper_sve_sel_pppp,
1218         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1219     };
1220     if (a->s) {
1221         return false;
1222     } else {
1223         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1224     }
1225 }
1226
1227 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1228 {
1229     tcg_gen_or_i64(pd, pn, pm);
1230     tcg_gen_and_i64(pd, pd, pg);
1231 }
1232
1233 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1234                            TCGv_vec pm, TCGv_vec pg)
1235 {
1236     tcg_gen_or_vec(vece, pd, pn, pm);
1237     tcg_gen_and_vec(vece, pd, pd, pg);
1238 }
1239
1240 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1241 {
1242     static const GVecGen4 op = {
1243         .fni8 = gen_orr_pg_i64,
1244         .fniv = gen_orr_pg_vec,
1245         .fno = gen_helper_sve_orr_pppp,
1246         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1247     };
1248     if (a->s) {
1249         return do_pppp_flags(s, a, &op);
1250     } else if (a->pg == a->rn && a->rn == a->rm) {
1251         return do_mov_p(s, a->rd, a->rn);
1252     } else {
1253         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1254     }
1255 }
1256
1257 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1258 {
1259     tcg_gen_orc_i64(pd, pn, pm);
1260     tcg_gen_and_i64(pd, pd, pg);
1261 }
1262
1263 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1264                            TCGv_vec pm, TCGv_vec pg)
1265 {
1266     tcg_gen_orc_vec(vece, pd, pn, pm);
1267     tcg_gen_and_vec(vece, pd, pd, pg);
1268 }
1269
1270 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1271 {
1272     static const GVecGen4 op = {
1273         .fni8 = gen_orn_pg_i64,
1274         .fniv = gen_orn_pg_vec,
1275         .fno = gen_helper_sve_orn_pppp,
1276         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1277     };
1278     if (a->s) {
1279         return do_pppp_flags(s, a, &op);
1280     } else {
1281         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1282     }
1283 }
1284
1285 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1286 {
1287     tcg_gen_or_i64(pd, pn, pm);
1288     tcg_gen_andc_i64(pd, pg, pd);
1289 }
1290
1291 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1292                            TCGv_vec pm, TCGv_vec pg)
1293 {
1294     tcg_gen_or_vec(vece, pd, pn, pm);
1295     tcg_gen_andc_vec(vece, pd, pg, pd);
1296 }
1297
1298 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1299 {
1300     static const GVecGen4 op = {
1301         .fni8 = gen_nor_pg_i64,
1302         .fniv = gen_nor_pg_vec,
1303         .fno = gen_helper_sve_nor_pppp,
1304         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1305     };
1306     if (a->s) {
1307         return do_pppp_flags(s, a, &op);
1308     } else {
1309         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1310     }
1311 }
1312
1313 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1314 {
1315     tcg_gen_and_i64(pd, pn, pm);
1316     tcg_gen_andc_i64(pd, pg, pd);
1317 }
1318
1319 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1320                            TCGv_vec pm, TCGv_vec pg)
1321 {
1322     tcg_gen_and_vec(vece, pd, pn, pm);
1323     tcg_gen_andc_vec(vece, pd, pg, pd);
1324 }
1325
1326 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1327 {
1328     static const GVecGen4 op = {
1329         .fni8 = gen_nand_pg_i64,
1330         .fniv = gen_nand_pg_vec,
1331         .fno = gen_helper_sve_nand_pppp,
1332         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1333     };
1334     if (a->s) {
1335         return do_pppp_flags(s, a, &op);
1336     } else {
1337         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1338     }
1339 }
1340
1341 /*
1342  *** SVE Predicate Misc Group
1343  */
1344
1345 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1346 {
1347     if (sve_access_check(s)) {
1348         int nofs = pred_full_reg_offset(s, a->rn);
1349         int gofs = pred_full_reg_offset(s, a->pg);
1350         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1351
1352         if (words == 1) {
1353             TCGv_i64 pn = tcg_temp_new_i64();
1354             TCGv_i64 pg = tcg_temp_new_i64();
1355
1356             tcg_gen_ld_i64(pn, cpu_env, nofs);
1357             tcg_gen_ld_i64(pg, cpu_env, gofs);
1358             do_predtest1(pn, pg);
1359
1360             tcg_temp_free_i64(pn);
1361             tcg_temp_free_i64(pg);
1362         } else {
1363             do_predtest(s, nofs, gofs, words);
1364         }
1365     }
1366     return true;
1367 }
1368
1369 /* See the ARM pseudocode DecodePredCount.  */
1370 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1371 {
1372     unsigned elements = fullsz >> esz;
1373     unsigned bound;
1374
1375     switch (pattern) {
1376     case 0x0: /* POW2 */
1377         return pow2floor(elements);
1378     case 0x1: /* VL1 */
1379     case 0x2: /* VL2 */
1380     case 0x3: /* VL3 */
1381     case 0x4: /* VL4 */
1382     case 0x5: /* VL5 */
1383     case 0x6: /* VL6 */
1384     case 0x7: /* VL7 */
1385     case 0x8: /* VL8 */
1386         bound = pattern;
1387         break;
1388     case 0x9: /* VL16 */
1389     case 0xa: /* VL32 */
1390     case 0xb: /* VL64 */
1391     case 0xc: /* VL128 */
1392     case 0xd: /* VL256 */
1393         bound = 16 << (pattern - 9);
1394         break;
1395     case 0x1d: /* MUL4 */
1396         return elements - elements % 4;
1397     case 0x1e: /* MUL3 */
1398         return elements - elements % 3;
1399     case 0x1f: /* ALL */
1400         return elements;
1401     default:   /* #uimm5 */
1402         return 0;
1403     }
1404     return elements >= bound ? bound : 0;
1405 }
1406
1407 /* This handles all of the predicate initialization instructions,
1408  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1409  * so that decode_pred_count returns 0.  For SETFFR, we will have
1410  * set RD == 16 == FFR.
1411  */
1412 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1413 {
1414     if (!sve_access_check(s)) {
1415         return true;
1416     }
1417
1418     unsigned fullsz = vec_full_reg_size(s);
1419     unsigned ofs = pred_full_reg_offset(s, rd);
1420     unsigned numelem, setsz, i;
1421     uint64_t word, lastword;
1422     TCGv_i64 t;
1423
1424     numelem = decode_pred_count(fullsz, pat, esz);
1425
1426     /* Determine what we must store into each bit, and how many.  */
1427     if (numelem == 0) {
1428         lastword = word = 0;
1429         setsz = fullsz;
1430     } else {
1431         setsz = numelem << esz;
1432         lastword = word = pred_esz_masks[esz];
1433         if (setsz % 64) {
1434             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1435         }
1436     }
1437
1438     t = tcg_temp_new_i64();
1439     if (fullsz <= 64) {
1440         tcg_gen_movi_i64(t, lastword);
1441         tcg_gen_st_i64(t, cpu_env, ofs);
1442         goto done;
1443     }
1444
1445     if (word == lastword) {
1446         unsigned maxsz = size_for_gvec(fullsz / 8);
1447         unsigned oprsz = size_for_gvec(setsz / 8);
1448
1449         if (oprsz * 8 == setsz) {
1450             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1451             goto done;
1452         }
1453     }
1454
1455     setsz /= 8;
1456     fullsz /= 8;
1457
1458     tcg_gen_movi_i64(t, word);
1459     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1460         tcg_gen_st_i64(t, cpu_env, ofs + i);
1461     }
1462     if (lastword != word) {
1463         tcg_gen_movi_i64(t, lastword);
1464         tcg_gen_st_i64(t, cpu_env, ofs + i);
1465         i += 8;
1466     }
1467     if (i < fullsz) {
1468         tcg_gen_movi_i64(t, 0);
1469         for (; i < fullsz; i += 8) {
1470             tcg_gen_st_i64(t, cpu_env, ofs + i);
1471         }
1472     }
1473
1474  done:
1475     tcg_temp_free_i64(t);
1476
1477     /* PTRUES */
1478     if (setflag) {
1479         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1480         tcg_gen_movi_i32(cpu_CF, word == 0);
1481         tcg_gen_movi_i32(cpu_VF, 0);
1482         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1483     }
1484     return true;
1485 }
1486
1487 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1488 {
1489     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1490 }
1491
1492 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1493 {
1494     /* Note pat == 31 is #all, to set all elements.  */
1495     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1496 }
1497
1498 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1499 {
1500     /* Note pat == 32 is #unimp, to set no elements.  */
1501     return do_predset(s, 0, a->rd, 32, false);
1502 }
1503
1504 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1505 {
1506     /* The path through do_pppp_flags is complicated enough to want to avoid
1507      * duplication.  Frob the arguments into the form of a predicated AND.
1508      */
1509     arg_rprr_s alt_a = {
1510         .rd = a->rd, .pg = a->pg, .s = a->s,
1511         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1512     };
1513     return trans_AND_pppp(s, &alt_a);
1514 }
1515
1516 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1517 {
1518     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1519 }
1520
1521 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1522 {
1523     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1524 }
1525
1526 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1527                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1528                                            TCGv_ptr, TCGv_i32))
1529 {
1530     if (!sve_access_check(s)) {
1531         return true;
1532     }
1533
1534     TCGv_ptr t_pd = tcg_temp_new_ptr();
1535     TCGv_ptr t_pg = tcg_temp_new_ptr();
1536     TCGv_i32 t;
1537     unsigned desc;
1538
1539     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1540     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1541
1542     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1543     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1544     t = tcg_const_i32(desc);
1545
1546     gen_fn(t, t_pd, t_pg, t);
1547     tcg_temp_free_ptr(t_pd);
1548     tcg_temp_free_ptr(t_pg);
1549
1550     do_pred_flags(t);
1551     tcg_temp_free_i32(t);
1552     return true;
1553 }
1554
1555 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1556 {
1557     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1558 }
1559
1560 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1561 {
1562     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1563 }
1564
1565 /*
1566  *** SVE Element Count Group
1567  */
1568
1569 /* Perform an inline saturating addition of a 32-bit value within
1570  * a 64-bit register.  The second operand is known to be positive,
1571  * which halves the comparisions we must perform to bound the result.
1572  */
1573 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1574 {
1575     int64_t ibound;
1576     TCGv_i64 bound;
1577     TCGCond cond;
1578
1579     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1580     if (u) {
1581         tcg_gen_ext32u_i64(reg, reg);
1582     } else {
1583         tcg_gen_ext32s_i64(reg, reg);
1584     }
1585     if (d) {
1586         tcg_gen_sub_i64(reg, reg, val);
1587         ibound = (u ? 0 : INT32_MIN);
1588         cond = TCG_COND_LT;
1589     } else {
1590         tcg_gen_add_i64(reg, reg, val);
1591         ibound = (u ? UINT32_MAX : INT32_MAX);
1592         cond = TCG_COND_GT;
1593     }
1594     bound = tcg_const_i64(ibound);
1595     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1596     tcg_temp_free_i64(bound);
1597 }
1598
1599 /* Similarly with 64-bit values.  */
1600 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1601 {
1602     TCGv_i64 t0 = tcg_temp_new_i64();
1603     TCGv_i64 t1 = tcg_temp_new_i64();
1604     TCGv_i64 t2;
1605
1606     if (u) {
1607         if (d) {
1608             tcg_gen_sub_i64(t0, reg, val);
1609             tcg_gen_movi_i64(t1, 0);
1610             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1611         } else {
1612             tcg_gen_add_i64(t0, reg, val);
1613             tcg_gen_movi_i64(t1, -1);
1614             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1615         }
1616     } else {
1617         if (d) {
1618             /* Detect signed overflow for subtraction.  */
1619             tcg_gen_xor_i64(t0, reg, val);
1620             tcg_gen_sub_i64(t1, reg, val);
1621             tcg_gen_xor_i64(reg, reg, t1);
1622             tcg_gen_and_i64(t0, t0, reg);
1623
1624             /* Bound the result.  */
1625             tcg_gen_movi_i64(reg, INT64_MIN);
1626             t2 = tcg_const_i64(0);
1627             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1628         } else {
1629             /* Detect signed overflow for addition.  */
1630             tcg_gen_xor_i64(t0, reg, val);
1631             tcg_gen_add_i64(reg, reg, val);
1632             tcg_gen_xor_i64(t1, reg, val);
1633             tcg_gen_andc_i64(t0, t1, t0);
1634
1635             /* Bound the result.  */
1636             tcg_gen_movi_i64(t1, INT64_MAX);
1637             t2 = tcg_const_i64(0);
1638             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1639         }
1640         tcg_temp_free_i64(t2);
1641     }
1642     tcg_temp_free_i64(t0);
1643     tcg_temp_free_i64(t1);
1644 }
1645
1646 /* Similarly with a vector and a scalar operand.  */
1647 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1648                               TCGv_i64 val, bool u, bool d)
1649 {
1650     unsigned vsz = vec_full_reg_size(s);
1651     TCGv_ptr dptr, nptr;
1652     TCGv_i32 t32, desc;
1653     TCGv_i64 t64;
1654
1655     dptr = tcg_temp_new_ptr();
1656     nptr = tcg_temp_new_ptr();
1657     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1658     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1659     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1660
1661     switch (esz) {
1662     case MO_8:
1663         t32 = tcg_temp_new_i32();
1664         tcg_gen_extrl_i64_i32(t32, val);
1665         if (d) {
1666             tcg_gen_neg_i32(t32, t32);
1667         }
1668         if (u) {
1669             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1670         } else {
1671             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1672         }
1673         tcg_temp_free_i32(t32);
1674         break;
1675
1676     case MO_16:
1677         t32 = tcg_temp_new_i32();
1678         tcg_gen_extrl_i64_i32(t32, val);
1679         if (d) {
1680             tcg_gen_neg_i32(t32, t32);
1681         }
1682         if (u) {
1683             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1684         } else {
1685             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1686         }
1687         tcg_temp_free_i32(t32);
1688         break;
1689
1690     case MO_32:
1691         t64 = tcg_temp_new_i64();
1692         if (d) {
1693             tcg_gen_neg_i64(t64, val);
1694         } else {
1695             tcg_gen_mov_i64(t64, val);
1696         }
1697         if (u) {
1698             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1699         } else {
1700             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1701         }
1702         tcg_temp_free_i64(t64);
1703         break;
1704
1705     case MO_64:
1706         if (u) {
1707             if (d) {
1708                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1709             } else {
1710                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1711             }
1712         } else if (d) {
1713             t64 = tcg_temp_new_i64();
1714             tcg_gen_neg_i64(t64, val);
1715             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1716             tcg_temp_free_i64(t64);
1717         } else {
1718             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1719         }
1720         break;
1721
1722     default:
1723         g_assert_not_reached();
1724     }
1725
1726     tcg_temp_free_ptr(dptr);
1727     tcg_temp_free_ptr(nptr);
1728     tcg_temp_free_i32(desc);
1729 }
1730
1731 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1732 {
1733     if (sve_access_check(s)) {
1734         unsigned fullsz = vec_full_reg_size(s);
1735         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1737     }
1738     return true;
1739 }
1740
1741 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1742 {
1743     if (sve_access_check(s)) {
1744         unsigned fullsz = vec_full_reg_size(s);
1745         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1746         int inc = numelem * a->imm * (a->d ? -1 : 1);
1747         TCGv_i64 reg = cpu_reg(s, a->rd);
1748
1749         tcg_gen_addi_i64(reg, reg, inc);
1750     }
1751     return true;
1752 }
1753
1754 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1755 {
1756     if (!sve_access_check(s)) {
1757         return true;
1758     }
1759
1760     unsigned fullsz = vec_full_reg_size(s);
1761     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1762     int inc = numelem * a->imm;
1763     TCGv_i64 reg = cpu_reg(s, a->rd);
1764
1765     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1766     if (inc == 0) {
1767         if (a->u) {
1768             tcg_gen_ext32u_i64(reg, reg);
1769         } else {
1770             tcg_gen_ext32s_i64(reg, reg);
1771         }
1772     } else {
1773         TCGv_i64 t = tcg_const_i64(inc);
1774         do_sat_addsub_32(reg, t, a->u, a->d);
1775         tcg_temp_free_i64(t);
1776     }
1777     return true;
1778 }
1779
1780 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1781 {
1782     if (!sve_access_check(s)) {
1783         return true;
1784     }
1785
1786     unsigned fullsz = vec_full_reg_size(s);
1787     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1788     int inc = numelem * a->imm;
1789     TCGv_i64 reg = cpu_reg(s, a->rd);
1790
1791     if (inc != 0) {
1792         TCGv_i64 t = tcg_const_i64(inc);
1793         do_sat_addsub_64(reg, t, a->u, a->d);
1794         tcg_temp_free_i64(t);
1795     }
1796     return true;
1797 }
1798
1799 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1800 {
1801     if (a->esz == 0) {
1802         return false;
1803     }
1804
1805     unsigned fullsz = vec_full_reg_size(s);
1806     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1807     int inc = numelem * a->imm;
1808
1809     if (inc != 0) {
1810         if (sve_access_check(s)) {
1811             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1812             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1813                               vec_full_reg_offset(s, a->rn),
1814                               t, fullsz, fullsz);
1815             tcg_temp_free_i64(t);
1816         }
1817     } else {
1818         do_mov_z(s, a->rd, a->rn);
1819     }
1820     return true;
1821 }
1822
1823 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1824 {
1825     if (a->esz == 0) {
1826         return false;
1827     }
1828
1829     unsigned fullsz = vec_full_reg_size(s);
1830     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1831     int inc = numelem * a->imm;
1832
1833     if (inc != 0) {
1834         if (sve_access_check(s)) {
1835             TCGv_i64 t = tcg_const_i64(inc);
1836             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1837             tcg_temp_free_i64(t);
1838         }
1839     } else {
1840         do_mov_z(s, a->rd, a->rn);
1841     }
1842     return true;
1843 }
1844
1845 /*
1846  *** SVE Bitwise Immediate Group
1847  */
1848
1849 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1850 {
1851     uint64_t imm;
1852     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1853                                 extract32(a->dbm, 0, 6),
1854                                 extract32(a->dbm, 6, 6))) {
1855         return false;
1856     }
1857     if (sve_access_check(s)) {
1858         unsigned vsz = vec_full_reg_size(s);
1859         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1860                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1861     }
1862     return true;
1863 }
1864
1865 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1866 {
1867     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1868 }
1869
1870 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1871 {
1872     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1873 }
1874
1875 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1876 {
1877     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1878 }
1879
1880 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1881 {
1882     uint64_t imm;
1883     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1884                                 extract32(a->dbm, 0, 6),
1885                                 extract32(a->dbm, 6, 6))) {
1886         return false;
1887     }
1888     if (sve_access_check(s)) {
1889         do_dupi_z(s, a->rd, imm);
1890     }
1891     return true;
1892 }
1893
1894 /*
1895  *** SVE Integer Wide Immediate - Predicated Group
1896  */
1897
1898 /* Implement all merging copies.  This is used for CPY (immediate),
1899  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1900  */
1901 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1902                      TCGv_i64 val)
1903 {
1904     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1905     static gen_cpy * const fns[4] = {
1906         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1907         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1908     };
1909     unsigned vsz = vec_full_reg_size(s);
1910     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1911     TCGv_ptr t_zd = tcg_temp_new_ptr();
1912     TCGv_ptr t_zn = tcg_temp_new_ptr();
1913     TCGv_ptr t_pg = tcg_temp_new_ptr();
1914
1915     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1916     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1917     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1918
1919     fns[esz](t_zd, t_zn, t_pg, val, desc);
1920
1921     tcg_temp_free_ptr(t_zd);
1922     tcg_temp_free_ptr(t_zn);
1923     tcg_temp_free_ptr(t_pg);
1924     tcg_temp_free_i32(desc);
1925 }
1926
1927 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1928 {
1929     if (a->esz == 0) {
1930         return false;
1931     }
1932     if (sve_access_check(s)) {
1933         /* Decode the VFP immediate.  */
1934         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1935         TCGv_i64 t_imm = tcg_const_i64(imm);
1936         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1937         tcg_temp_free_i64(t_imm);
1938     }
1939     return true;
1940 }
1941
1942 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1943 {
1944     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1945         return false;
1946     }
1947     if (sve_access_check(s)) {
1948         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1949         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1950         tcg_temp_free_i64(t_imm);
1951     }
1952     return true;
1953 }
1954
1955 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1956 {
1957     static gen_helper_gvec_2i * const fns[4] = {
1958         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1959         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1960     };
1961
1962     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1963         return false;
1964     }
1965     if (sve_access_check(s)) {
1966         unsigned vsz = vec_full_reg_size(s);
1967         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1968         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1969                             pred_full_reg_offset(s, a->pg),
1970                             t_imm, vsz, vsz, 0, fns[a->esz]);
1971         tcg_temp_free_i64(t_imm);
1972     }
1973     return true;
1974 }
1975
1976 /*
1977  *** SVE Permute Extract Group
1978  */
1979
1980 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1981 {
1982     if (!sve_access_check(s)) {
1983         return true;
1984     }
1985
1986     unsigned vsz = vec_full_reg_size(s);
1987     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1988     unsigned n_siz = vsz - n_ofs;
1989     unsigned d = vec_full_reg_offset(s, a->rd);
1990     unsigned n = vec_full_reg_offset(s, a->rn);
1991     unsigned m = vec_full_reg_offset(s, a->rm);
1992
1993     /* Use host vector move insns if we have appropriate sizes
1994      * and no unfortunate overlap.
1995      */
1996     if (m != d
1997         && n_ofs == size_for_gvec(n_ofs)
1998         && n_siz == size_for_gvec(n_siz)
1999         && (d != n || n_siz <= n_ofs)) {
2000         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2001         if (n_ofs != 0) {
2002             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2003         }
2004     } else {
2005         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2006     }
2007     return true;
2008 }
2009
2010 /*
2011  *** SVE Permute - Unpredicated Group
2012  */
2013
2014 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2015 {
2016     if (sve_access_check(s)) {
2017         unsigned vsz = vec_full_reg_size(s);
2018         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2019                              vsz, vsz, cpu_reg_sp(s, a->rn));
2020     }
2021     return true;
2022 }
2023
2024 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2025 {
2026     if ((a->imm & 0x1f) == 0) {
2027         return false;
2028     }
2029     if (sve_access_check(s)) {
2030         unsigned vsz = vec_full_reg_size(s);
2031         unsigned dofs = vec_full_reg_offset(s, a->rd);
2032         unsigned esz, index;
2033
2034         esz = ctz32(a->imm);
2035         index = a->imm >> (esz + 1);
2036
2037         if ((index << esz) < vsz) {
2038             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2039             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2040         } else {
2041             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2042         }
2043     }
2044     return true;
2045 }
2046
2047 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2048 {
2049     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2050     static gen_insr * const fns[4] = {
2051         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2052         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2053     };
2054     unsigned vsz = vec_full_reg_size(s);
2055     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2056     TCGv_ptr t_zd = tcg_temp_new_ptr();
2057     TCGv_ptr t_zn = tcg_temp_new_ptr();
2058
2059     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2060     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2061
2062     fns[a->esz](t_zd, t_zn, val, desc);
2063
2064     tcg_temp_free_ptr(t_zd);
2065     tcg_temp_free_ptr(t_zn);
2066     tcg_temp_free_i32(desc);
2067 }
2068
2069 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2070 {
2071     if (sve_access_check(s)) {
2072         TCGv_i64 t = tcg_temp_new_i64();
2073         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2074         do_insr_i64(s, a, t);
2075         tcg_temp_free_i64(t);
2076     }
2077     return true;
2078 }
2079
2080 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2081 {
2082     if (sve_access_check(s)) {
2083         do_insr_i64(s, a, cpu_reg(s, a->rm));
2084     }
2085     return true;
2086 }
2087
2088 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2089 {
2090     static gen_helper_gvec_2 * const fns[4] = {
2091         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2092         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2093     };
2094
2095     if (sve_access_check(s)) {
2096         unsigned vsz = vec_full_reg_size(s);
2097         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2098                            vec_full_reg_offset(s, a->rn),
2099                            vsz, vsz, 0, fns[a->esz]);
2100     }
2101     return true;
2102 }
2103
2104 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2105 {
2106     static gen_helper_gvec_3 * const fns[4] = {
2107         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2108         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2109     };
2110
2111     if (sve_access_check(s)) {
2112         unsigned vsz = vec_full_reg_size(s);
2113         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2114                            vec_full_reg_offset(s, a->rn),
2115                            vec_full_reg_offset(s, a->rm),
2116                            vsz, vsz, 0, fns[a->esz]);
2117     }
2118     return true;
2119 }
2120
2121 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2122 {
2123     static gen_helper_gvec_2 * const fns[4][2] = {
2124         { NULL, NULL },
2125         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2126         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2127         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2128     };
2129
2130     if (a->esz == 0) {
2131         return false;
2132     }
2133     if (sve_access_check(s)) {
2134         unsigned vsz = vec_full_reg_size(s);
2135         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2136                            vec_full_reg_offset(s, a->rn)
2137                            + (a->h ? vsz / 2 : 0),
2138                            vsz, vsz, 0, fns[a->esz][a->u]);
2139     }
2140     return true;
2141 }
2142
2143 /*
2144  *** SVE Permute - Predicates Group
2145  */
2146
2147 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2148                           gen_helper_gvec_3 *fn)
2149 {
2150     if (!sve_access_check(s)) {
2151         return true;
2152     }
2153
2154     unsigned vsz = pred_full_reg_size(s);
2155
2156     /* Predicate sizes may be smaller and cannot use simd_desc.
2157        We cannot round up, as we do elsewhere, because we need
2158        the exact size for ZIP2 and REV.  We retain the style for
2159        the other helpers for consistency.  */
2160     TCGv_ptr t_d = tcg_temp_new_ptr();
2161     TCGv_ptr t_n = tcg_temp_new_ptr();
2162     TCGv_ptr t_m = tcg_temp_new_ptr();
2163     TCGv_i32 t_desc;
2164     int desc;
2165
2166     desc = vsz - 2;
2167     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2168     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2169
2170     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2171     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2172     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2173     t_desc = tcg_const_i32(desc);
2174
2175     fn(t_d, t_n, t_m, t_desc);
2176
2177     tcg_temp_free_ptr(t_d);
2178     tcg_temp_free_ptr(t_n);
2179     tcg_temp_free_ptr(t_m);
2180     tcg_temp_free_i32(t_desc);
2181     return true;
2182 }
2183
2184 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2185                           gen_helper_gvec_2 *fn)
2186 {
2187     if (!sve_access_check(s)) {
2188         return true;
2189     }
2190
2191     unsigned vsz = pred_full_reg_size(s);
2192     TCGv_ptr t_d = tcg_temp_new_ptr();
2193     TCGv_ptr t_n = tcg_temp_new_ptr();
2194     TCGv_i32 t_desc;
2195     int desc;
2196
2197     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2198     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2199
2200     /* Predicate sizes may be smaller and cannot use simd_desc.
2201        We cannot round up, as we do elsewhere, because we need
2202        the exact size for ZIP2 and REV.  We retain the style for
2203        the other helpers for consistency.  */
2204
2205     desc = vsz - 2;
2206     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2207     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2208     t_desc = tcg_const_i32(desc);
2209
2210     fn(t_d, t_n, t_desc);
2211
2212     tcg_temp_free_i32(t_desc);
2213     tcg_temp_free_ptr(t_d);
2214     tcg_temp_free_ptr(t_n);
2215     return true;
2216 }
2217
2218 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2219 {
2220     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2221 }
2222
2223 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2224 {
2225     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2226 }
2227
2228 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2229 {
2230     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2231 }
2232
2233 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2234 {
2235     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2236 }
2237
2238 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2239 {
2240     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2241 }
2242
2243 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2244 {
2245     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2246 }
2247
2248 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2249 {
2250     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2251 }
2252
2253 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2254 {
2255     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2256 }
2257
2258 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2259 {
2260     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2261 }
2262
2263 /*
2264  *** SVE Permute - Interleaving Group
2265  */
2266
2267 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2268 {
2269     static gen_helper_gvec_3 * const fns[4] = {
2270         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2271         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2272     };
2273
2274     if (sve_access_check(s)) {
2275         unsigned vsz = vec_full_reg_size(s);
2276         unsigned high_ofs = high ? vsz / 2 : 0;
2277         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2278                            vec_full_reg_offset(s, a->rn) + high_ofs,
2279                            vec_full_reg_offset(s, a->rm) + high_ofs,
2280                            vsz, vsz, 0, fns[a->esz]);
2281     }
2282     return true;
2283 }
2284
2285 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2286                             gen_helper_gvec_3 *fn)
2287 {
2288     if (sve_access_check(s)) {
2289         unsigned vsz = vec_full_reg_size(s);
2290         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2291                            vec_full_reg_offset(s, a->rn),
2292                            vec_full_reg_offset(s, a->rm),
2293                            vsz, vsz, data, fn);
2294     }
2295     return true;
2296 }
2297
2298 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2299 {
2300     return do_zip(s, a, false);
2301 }
2302
2303 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2304 {
2305     return do_zip(s, a, true);
2306 }
2307
2308 static gen_helper_gvec_3 * const uzp_fns[4] = {
2309     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2310     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2311 };
2312
2313 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2314 {
2315     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2316 }
2317
2318 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2319 {
2320     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2321 }
2322
2323 static gen_helper_gvec_3 * const trn_fns[4] = {
2324     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2325     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2326 };
2327
2328 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2329 {
2330     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2331 }
2332
2333 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2334 {
2335     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2336 }
2337
2338 /*
2339  *** SVE Permute Vector - Predicated Group
2340  */
2341
2342 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2343 {
2344     static gen_helper_gvec_3 * const fns[4] = {
2345         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2346     };
2347     return do_zpz_ool(s, a, fns[a->esz]);
2348 }
2349
2350 /* Call the helper that computes the ARM LastActiveElement pseudocode
2351  * function, scaled by the element size.  This includes the not found
2352  * indication; e.g. not found for esz=3 is -8.
2353  */
2354 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2355 {
2356     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2357      * round up, as we do elsewhere, because we need the exact size.
2358      */
2359     TCGv_ptr t_p = tcg_temp_new_ptr();
2360     TCGv_i32 t_desc;
2361     unsigned vsz = pred_full_reg_size(s);
2362     unsigned desc;
2363
2364     desc = vsz - 2;
2365     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2366
2367     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2368     t_desc = tcg_const_i32(desc);
2369
2370     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2371
2372     tcg_temp_free_i32(t_desc);
2373     tcg_temp_free_ptr(t_p);
2374 }
2375
2376 /* Increment LAST to the offset of the next element in the vector,
2377  * wrapping around to 0.
2378  */
2379 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2380 {
2381     unsigned vsz = vec_full_reg_size(s);
2382
2383     tcg_gen_addi_i32(last, last, 1 << esz);
2384     if (is_power_of_2(vsz)) {
2385         tcg_gen_andi_i32(last, last, vsz - 1);
2386     } else {
2387         TCGv_i32 max = tcg_const_i32(vsz);
2388         TCGv_i32 zero = tcg_const_i32(0);
2389         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2390         tcg_temp_free_i32(max);
2391         tcg_temp_free_i32(zero);
2392     }
2393 }
2394
2395 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2396 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2397 {
2398     unsigned vsz = vec_full_reg_size(s);
2399
2400     if (is_power_of_2(vsz)) {
2401         tcg_gen_andi_i32(last, last, vsz - 1);
2402     } else {
2403         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2404         TCGv_i32 zero = tcg_const_i32(0);
2405         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2406         tcg_temp_free_i32(max);
2407         tcg_temp_free_i32(zero);
2408     }
2409 }
2410
2411 /* Load an unsigned element of ESZ from BASE+OFS.  */
2412 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2413 {
2414     TCGv_i64 r = tcg_temp_new_i64();
2415
2416     switch (esz) {
2417     case 0:
2418         tcg_gen_ld8u_i64(r, base, ofs);
2419         break;
2420     case 1:
2421         tcg_gen_ld16u_i64(r, base, ofs);
2422         break;
2423     case 2:
2424         tcg_gen_ld32u_i64(r, base, ofs);
2425         break;
2426     case 3:
2427         tcg_gen_ld_i64(r, base, ofs);
2428         break;
2429     default:
2430         g_assert_not_reached();
2431     }
2432     return r;
2433 }
2434
2435 /* Load an unsigned element of ESZ from RM[LAST].  */
2436 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2437                                  int rm, int esz)
2438 {
2439     TCGv_ptr p = tcg_temp_new_ptr();
2440     TCGv_i64 r;
2441
2442     /* Convert offset into vector into offset into ENV.
2443      * The final adjustment for the vector register base
2444      * is added via constant offset to the load.
2445      */
2446 #ifdef HOST_WORDS_BIGENDIAN
2447     /* Adjust for element ordering.  See vec_reg_offset.  */
2448     if (esz < 3) {
2449         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2450     }
2451 #endif
2452     tcg_gen_ext_i32_ptr(p, last);
2453     tcg_gen_add_ptr(p, p, cpu_env);
2454
2455     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2456     tcg_temp_free_ptr(p);
2457
2458     return r;
2459 }
2460
2461 /* Compute CLAST for a Zreg.  */
2462 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2463 {
2464     TCGv_i32 last;
2465     TCGLabel *over;
2466     TCGv_i64 ele;
2467     unsigned vsz, esz = a->esz;
2468
2469     if (!sve_access_check(s)) {
2470         return true;
2471     }
2472
2473     last = tcg_temp_local_new_i32();
2474     over = gen_new_label();
2475
2476     find_last_active(s, last, esz, a->pg);
2477
2478     /* There is of course no movcond for a 2048-bit vector,
2479      * so we must branch over the actual store.
2480      */
2481     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2482
2483     if (!before) {
2484         incr_last_active(s, last, esz);
2485     }
2486
2487     ele = load_last_active(s, last, a->rm, esz);
2488     tcg_temp_free_i32(last);
2489
2490     vsz = vec_full_reg_size(s);
2491     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2492     tcg_temp_free_i64(ele);
2493
2494     /* If this insn used MOVPRFX, we may need a second move.  */
2495     if (a->rd != a->rn) {
2496         TCGLabel *done = gen_new_label();
2497         tcg_gen_br(done);
2498
2499         gen_set_label(over);
2500         do_mov_z(s, a->rd, a->rn);
2501
2502         gen_set_label(done);
2503     } else {
2504         gen_set_label(over);
2505     }
2506     return true;
2507 }
2508
2509 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2510 {
2511     return do_clast_vector(s, a, false);
2512 }
2513
2514 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2515 {
2516     return do_clast_vector(s, a, true);
2517 }
2518
2519 /* Compute CLAST for a scalar.  */
2520 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2521                             bool before, TCGv_i64 reg_val)
2522 {
2523     TCGv_i32 last = tcg_temp_new_i32();
2524     TCGv_i64 ele, cmp, zero;
2525
2526     find_last_active(s, last, esz, pg);
2527
2528     /* Extend the original value of last prior to incrementing.  */
2529     cmp = tcg_temp_new_i64();
2530     tcg_gen_ext_i32_i64(cmp, last);
2531
2532     if (!before) {
2533         incr_last_active(s, last, esz);
2534     }
2535
2536     /* The conceit here is that while last < 0 indicates not found, after
2537      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2538      * from which we can load garbage.  We then discard the garbage with
2539      * a conditional move.
2540      */
2541     ele = load_last_active(s, last, rm, esz);
2542     tcg_temp_free_i32(last);
2543
2544     zero = tcg_const_i64(0);
2545     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2546
2547     tcg_temp_free_i64(zero);
2548     tcg_temp_free_i64(cmp);
2549     tcg_temp_free_i64(ele);
2550 }
2551
2552 /* Compute CLAST for a Vreg.  */
2553 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2554 {
2555     if (sve_access_check(s)) {
2556         int esz = a->esz;
2557         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2558         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2559
2560         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2561         write_fp_dreg(s, a->rd, reg);
2562         tcg_temp_free_i64(reg);
2563     }
2564     return true;
2565 }
2566
2567 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2568 {
2569     return do_clast_fp(s, a, false);
2570 }
2571
2572 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2573 {
2574     return do_clast_fp(s, a, true);
2575 }
2576
2577 /* Compute CLAST for a Xreg.  */
2578 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2579 {
2580     TCGv_i64 reg;
2581
2582     if (!sve_access_check(s)) {
2583         return true;
2584     }
2585
2586     reg = cpu_reg(s, a->rd);
2587     switch (a->esz) {
2588     case 0:
2589         tcg_gen_ext8u_i64(reg, reg);
2590         break;
2591     case 1:
2592         tcg_gen_ext16u_i64(reg, reg);
2593         break;
2594     case 2:
2595         tcg_gen_ext32u_i64(reg, reg);
2596         break;
2597     case 3:
2598         break;
2599     default:
2600         g_assert_not_reached();
2601     }
2602
2603     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2604     return true;
2605 }
2606
2607 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2608 {
2609     return do_clast_general(s, a, false);
2610 }
2611
2612 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2613 {
2614     return do_clast_general(s, a, true);
2615 }
2616
2617 /* Compute LAST for a scalar.  */
2618 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2619                                int pg, int rm, bool before)
2620 {
2621     TCGv_i32 last = tcg_temp_new_i32();
2622     TCGv_i64 ret;
2623
2624     find_last_active(s, last, esz, pg);
2625     if (before) {
2626         wrap_last_active(s, last, esz);
2627     } else {
2628         incr_last_active(s, last, esz);
2629     }
2630
2631     ret = load_last_active(s, last, rm, esz);
2632     tcg_temp_free_i32(last);
2633     return ret;
2634 }
2635
2636 /* Compute LAST for a Vreg.  */
2637 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2638 {
2639     if (sve_access_check(s)) {
2640         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2641         write_fp_dreg(s, a->rd, val);
2642         tcg_temp_free_i64(val);
2643     }
2644     return true;
2645 }
2646
2647 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2648 {
2649     return do_last_fp(s, a, false);
2650 }
2651
2652 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2653 {
2654     return do_last_fp(s, a, true);
2655 }
2656
2657 /* Compute LAST for a Xreg.  */
2658 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2659 {
2660     if (sve_access_check(s)) {
2661         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2662         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2663         tcg_temp_free_i64(val);
2664     }
2665     return true;
2666 }
2667
2668 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2669 {
2670     return do_last_general(s, a, false);
2671 }
2672
2673 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2674 {
2675     return do_last_general(s, a, true);
2676 }
2677
2678 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2679 {
2680     if (sve_access_check(s)) {
2681         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2682     }
2683     return true;
2684 }
2685
2686 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2687 {
2688     if (sve_access_check(s)) {
2689         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2690         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2691         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2692         tcg_temp_free_i64(t);
2693     }
2694     return true;
2695 }
2696
2697 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2698 {
2699     static gen_helper_gvec_3 * const fns[4] = {
2700         NULL,
2701         gen_helper_sve_revb_h,
2702         gen_helper_sve_revb_s,
2703         gen_helper_sve_revb_d,
2704     };
2705     return do_zpz_ool(s, a, fns[a->esz]);
2706 }
2707
2708 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2709 {
2710     static gen_helper_gvec_3 * const fns[4] = {
2711         NULL,
2712         NULL,
2713         gen_helper_sve_revh_s,
2714         gen_helper_sve_revh_d,
2715     };
2716     return do_zpz_ool(s, a, fns[a->esz]);
2717 }
2718
2719 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2720 {
2721     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2722 }
2723
2724 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2725 {
2726     static gen_helper_gvec_3 * const fns[4] = {
2727         gen_helper_sve_rbit_b,
2728         gen_helper_sve_rbit_h,
2729         gen_helper_sve_rbit_s,
2730         gen_helper_sve_rbit_d,
2731     };
2732     return do_zpz_ool(s, a, fns[a->esz]);
2733 }
2734
2735 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2736 {
2737     if (sve_access_check(s)) {
2738         unsigned vsz = vec_full_reg_size(s);
2739         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2740                            vec_full_reg_offset(s, a->rn),
2741                            vec_full_reg_offset(s, a->rm),
2742                            pred_full_reg_offset(s, a->pg),
2743                            vsz, vsz, a->esz, gen_helper_sve_splice);
2744     }
2745     return true;
2746 }
2747
2748 /*
2749  *** SVE Integer Compare - Vectors Group
2750  */
2751
2752 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2753                           gen_helper_gvec_flags_4 *gen_fn)
2754 {
2755     TCGv_ptr pd, zn, zm, pg;
2756     unsigned vsz;
2757     TCGv_i32 t;
2758
2759     if (gen_fn == NULL) {
2760         return false;
2761     }
2762     if (!sve_access_check(s)) {
2763         return true;
2764     }
2765
2766     vsz = vec_full_reg_size(s);
2767     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2768     pd = tcg_temp_new_ptr();
2769     zn = tcg_temp_new_ptr();
2770     zm = tcg_temp_new_ptr();
2771     pg = tcg_temp_new_ptr();
2772
2773     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2774     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2775     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2776     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2777
2778     gen_fn(t, pd, zn, zm, pg, t);
2779
2780     tcg_temp_free_ptr(pd);
2781     tcg_temp_free_ptr(zn);
2782     tcg_temp_free_ptr(zm);
2783     tcg_temp_free_ptr(pg);
2784
2785     do_pred_flags(t);
2786
2787     tcg_temp_free_i32(t);
2788     return true;
2789 }
2790
2791 #define DO_PPZZ(NAME, name) \
2792 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2793 {                                                                         \
2794     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2795         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2796         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2797     };                                                                    \
2798     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2799 }
2800
2801 DO_PPZZ(CMPEQ, cmpeq)
2802 DO_PPZZ(CMPNE, cmpne)
2803 DO_PPZZ(CMPGT, cmpgt)
2804 DO_PPZZ(CMPGE, cmpge)
2805 DO_PPZZ(CMPHI, cmphi)
2806 DO_PPZZ(CMPHS, cmphs)
2807
2808 #undef DO_PPZZ
2809
2810 #define DO_PPZW(NAME, name) \
2811 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2812 {                                                                         \
2813     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2814         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2815         gen_helper_sve_##name##_ppzw_s, NULL                              \
2816     };                                                                    \
2817     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2818 }
2819
2820 DO_PPZW(CMPEQ, cmpeq)
2821 DO_PPZW(CMPNE, cmpne)
2822 DO_PPZW(CMPGT, cmpgt)
2823 DO_PPZW(CMPGE, cmpge)
2824 DO_PPZW(CMPHI, cmphi)
2825 DO_PPZW(CMPHS, cmphs)
2826 DO_PPZW(CMPLT, cmplt)
2827 DO_PPZW(CMPLE, cmple)
2828 DO_PPZW(CMPLO, cmplo)
2829 DO_PPZW(CMPLS, cmpls)
2830
2831 #undef DO_PPZW
2832
2833 /*
2834  *** SVE Integer Compare - Immediate Groups
2835  */
2836
2837 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2838                           gen_helper_gvec_flags_3 *gen_fn)
2839 {
2840     TCGv_ptr pd, zn, pg;
2841     unsigned vsz;
2842     TCGv_i32 t;
2843
2844     if (gen_fn == NULL) {
2845         return false;
2846     }
2847     if (!sve_access_check(s)) {
2848         return true;
2849     }
2850
2851     vsz = vec_full_reg_size(s);
2852     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2853     pd = tcg_temp_new_ptr();
2854     zn = tcg_temp_new_ptr();
2855     pg = tcg_temp_new_ptr();
2856
2857     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2858     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2859     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2860
2861     gen_fn(t, pd, zn, pg, t);
2862
2863     tcg_temp_free_ptr(pd);
2864     tcg_temp_free_ptr(zn);
2865     tcg_temp_free_ptr(pg);
2866
2867     do_pred_flags(t);
2868
2869     tcg_temp_free_i32(t);
2870     return true;
2871 }
2872
2873 #define DO_PPZI(NAME, name) \
2874 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2875 {                                                                         \
2876     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2877         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2878         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2879     };                                                                    \
2880     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2881 }
2882
2883 DO_PPZI(CMPEQ, cmpeq)
2884 DO_PPZI(CMPNE, cmpne)
2885 DO_PPZI(CMPGT, cmpgt)
2886 DO_PPZI(CMPGE, cmpge)
2887 DO_PPZI(CMPHI, cmphi)
2888 DO_PPZI(CMPHS, cmphs)
2889 DO_PPZI(CMPLT, cmplt)
2890 DO_PPZI(CMPLE, cmple)
2891 DO_PPZI(CMPLO, cmplo)
2892 DO_PPZI(CMPLS, cmpls)
2893
2894 #undef DO_PPZI
2895
2896 /*
2897  *** SVE Partition Break Group
2898  */
2899
2900 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2901                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2902 {
2903     if (!sve_access_check(s)) {
2904         return true;
2905     }
2906
2907     unsigned vsz = pred_full_reg_size(s);
2908
2909     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2910     TCGv_ptr d = tcg_temp_new_ptr();
2911     TCGv_ptr n = tcg_temp_new_ptr();
2912     TCGv_ptr m = tcg_temp_new_ptr();
2913     TCGv_ptr g = tcg_temp_new_ptr();
2914     TCGv_i32 t = tcg_const_i32(vsz - 2);
2915
2916     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2917     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2918     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2919     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2920
2921     if (a->s) {
2922         fn_s(t, d, n, m, g, t);
2923         do_pred_flags(t);
2924     } else {
2925         fn(d, n, m, g, t);
2926     }
2927     tcg_temp_free_ptr(d);
2928     tcg_temp_free_ptr(n);
2929     tcg_temp_free_ptr(m);
2930     tcg_temp_free_ptr(g);
2931     tcg_temp_free_i32(t);
2932     return true;
2933 }
2934
2935 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2936                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2937 {
2938     if (!sve_access_check(s)) {
2939         return true;
2940     }
2941
2942     unsigned vsz = pred_full_reg_size(s);
2943
2944     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2945     TCGv_ptr d = tcg_temp_new_ptr();
2946     TCGv_ptr n = tcg_temp_new_ptr();
2947     TCGv_ptr g = tcg_temp_new_ptr();
2948     TCGv_i32 t = tcg_const_i32(vsz - 2);
2949
2950     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2951     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2952     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2953
2954     if (a->s) {
2955         fn_s(t, d, n, g, t);
2956         do_pred_flags(t);
2957     } else {
2958         fn(d, n, g, t);
2959     }
2960     tcg_temp_free_ptr(d);
2961     tcg_temp_free_ptr(n);
2962     tcg_temp_free_ptr(g);
2963     tcg_temp_free_i32(t);
2964     return true;
2965 }
2966
2967 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2968 {
2969     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2970 }
2971
2972 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2973 {
2974     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2975 }
2976
2977 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2978 {
2979     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2980 }
2981
2982 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2983 {
2984     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2985 }
2986
2987 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2988 {
2989     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2990 }
2991
2992 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2993 {
2994     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2995 }
2996
2997 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
2998 {
2999     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3000 }
3001
3002 /*
3003  *** SVE Predicate Count Group
3004  */
3005
3006 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3007 {
3008     unsigned psz = pred_full_reg_size(s);
3009
3010     if (psz <= 8) {
3011         uint64_t psz_mask;
3012
3013         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3014         if (pn != pg) {
3015             TCGv_i64 g = tcg_temp_new_i64();
3016             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3017             tcg_gen_and_i64(val, val, g);
3018             tcg_temp_free_i64(g);
3019         }
3020
3021         /* Reduce the pred_esz_masks value simply to reduce the
3022          * size of the code generated here.
3023          */
3024         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3025         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3026
3027         tcg_gen_ctpop_i64(val, val);
3028     } else {
3029         TCGv_ptr t_pn = tcg_temp_new_ptr();
3030         TCGv_ptr t_pg = tcg_temp_new_ptr();
3031         unsigned desc;
3032         TCGv_i32 t_desc;
3033
3034         desc = psz - 2;
3035         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3036
3037         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3038         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3039         t_desc = tcg_const_i32(desc);
3040
3041         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3042         tcg_temp_free_ptr(t_pn);
3043         tcg_temp_free_ptr(t_pg);
3044         tcg_temp_free_i32(t_desc);
3045     }
3046 }
3047
3048 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3049 {
3050     if (sve_access_check(s)) {
3051         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3052     }
3053     return true;
3054 }
3055
3056 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3057 {
3058     if (sve_access_check(s)) {
3059         TCGv_i64 reg = cpu_reg(s, a->rd);
3060         TCGv_i64 val = tcg_temp_new_i64();
3061
3062         do_cntp(s, val, a->esz, a->pg, a->pg);
3063         if (a->d) {
3064             tcg_gen_sub_i64(reg, reg, val);
3065         } else {
3066             tcg_gen_add_i64(reg, reg, val);
3067         }
3068         tcg_temp_free_i64(val);
3069     }
3070     return true;
3071 }
3072
3073 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3074 {
3075     if (a->esz == 0) {
3076         return false;
3077     }
3078     if (sve_access_check(s)) {
3079         unsigned vsz = vec_full_reg_size(s);
3080         TCGv_i64 val = tcg_temp_new_i64();
3081         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3082
3083         do_cntp(s, val, a->esz, a->pg, a->pg);
3084         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3085                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3086     }
3087     return true;
3088 }
3089
3090 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3091 {
3092     if (sve_access_check(s)) {
3093         TCGv_i64 reg = cpu_reg(s, a->rd);
3094         TCGv_i64 val = tcg_temp_new_i64();
3095
3096         do_cntp(s, val, a->esz, a->pg, a->pg);
3097         do_sat_addsub_32(reg, val, a->u, a->d);
3098     }
3099     return true;
3100 }
3101
3102 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3103 {
3104     if (sve_access_check(s)) {
3105         TCGv_i64 reg = cpu_reg(s, a->rd);
3106         TCGv_i64 val = tcg_temp_new_i64();
3107
3108         do_cntp(s, val, a->esz, a->pg, a->pg);
3109         do_sat_addsub_64(reg, val, a->u, a->d);
3110     }
3111     return true;
3112 }
3113
3114 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3115 {
3116     if (a->esz == 0) {
3117         return false;
3118     }
3119     if (sve_access_check(s)) {
3120         TCGv_i64 val = tcg_temp_new_i64();
3121         do_cntp(s, val, a->esz, a->pg, a->pg);
3122         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3123     }
3124     return true;
3125 }
3126
3127 /*
3128  *** SVE Integer Compare Scalars Group
3129  */
3130
3131 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3132 {
3133     if (!sve_access_check(s)) {
3134         return true;
3135     }
3136
3137     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140     TCGv_i64 cmp = tcg_temp_new_i64();
3141
3142     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144     tcg_temp_free_i64(cmp);
3145
3146     /* VF = !NF & !CF.  */
3147     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3149
3150     /* Both NF and VF actually look at bit 31.  */
3151     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3153     return true;
3154 }
3155
3156 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3157 {
3158     TCGv_i64 op0, op1, t0, t1, tmax;
3159     TCGv_i32 t2, t3;
3160     TCGv_ptr ptr;
3161     unsigned desc, vsz = vec_full_reg_size(s);
3162     TCGCond cond;
3163
3164     if (!sve_access_check(s)) {
3165         return true;
3166     }
3167
3168     op0 = read_cpu_reg(s, a->rn, 1);
3169     op1 = read_cpu_reg(s, a->rm, 1);
3170
3171     if (!a->sf) {
3172         if (a->u) {
3173             tcg_gen_ext32u_i64(op0, op0);
3174             tcg_gen_ext32u_i64(op1, op1);
3175         } else {
3176             tcg_gen_ext32s_i64(op0, op0);
3177             tcg_gen_ext32s_i64(op1, op1);
3178         }
3179     }
3180
3181     /* For the helper, compress the different conditions into a computation
3182      * of how many iterations for which the condition is true.
3183      */
3184     t0 = tcg_temp_new_i64();
3185     t1 = tcg_temp_new_i64();
3186     tcg_gen_sub_i64(t0, op1, op0);
3187
3188     tmax = tcg_const_i64(vsz >> a->esz);
3189     if (a->eq) {
3190         /* Equality means one more iteration.  */
3191         tcg_gen_addi_i64(t0, t0, 1);
3192
3193         /* If op1 is max (un)signed integer (and the only time the addition
3194          * above could overflow), then we produce an all-true predicate by
3195          * setting the count to the vector length.  This is because the
3196          * pseudocode is described as an increment + compare loop, and the
3197          * max integer would always compare true.
3198          */
3199         tcg_gen_movi_i64(t1, (a->sf
3200                               ? (a->u ? UINT64_MAX : INT64_MAX)
3201                               : (a->u ? UINT32_MAX : INT32_MAX)));
3202         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3203     }
3204
3205     /* Bound to the maximum.  */
3206     tcg_gen_umin_i64(t0, t0, tmax);
3207     tcg_temp_free_i64(tmax);
3208
3209     /* Set the count to zero if the condition is false.  */
3210     cond = (a->u
3211             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3212             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3213     tcg_gen_movi_i64(t1, 0);
3214     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3215     tcg_temp_free_i64(t1);
3216
3217     /* Since we're bounded, pass as a 32-bit type.  */
3218     t2 = tcg_temp_new_i32();
3219     tcg_gen_extrl_i64_i32(t2, t0);
3220     tcg_temp_free_i64(t0);
3221
3222     /* Scale elements to bits.  */
3223     tcg_gen_shli_i32(t2, t2, a->esz);
3224
3225     desc = (vsz / 8) - 2;
3226     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3227     t3 = tcg_const_i32(desc);
3228
3229     ptr = tcg_temp_new_ptr();
3230     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3231
3232     gen_helper_sve_while(t2, ptr, t2, t3);
3233     do_pred_flags(t2);
3234
3235     tcg_temp_free_ptr(ptr);
3236     tcg_temp_free_i32(t2);
3237     tcg_temp_free_i32(t3);
3238     return true;
3239 }
3240
3241 /*
3242  *** SVE Integer Wide Immediate - Unpredicated Group
3243  */
3244
3245 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3246 {
3247     if (a->esz == 0) {
3248         return false;
3249     }
3250     if (sve_access_check(s)) {
3251         unsigned vsz = vec_full_reg_size(s);
3252         int dofs = vec_full_reg_offset(s, a->rd);
3253         uint64_t imm;
3254
3255         /* Decode the VFP immediate.  */
3256         imm = vfp_expand_imm(a->esz, a->imm);
3257         imm = dup_const(a->esz, imm);
3258
3259         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3260     }
3261     return true;
3262 }
3263
3264 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3265 {
3266     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3267         return false;
3268     }
3269     if (sve_access_check(s)) {
3270         unsigned vsz = vec_full_reg_size(s);
3271         int dofs = vec_full_reg_offset(s, a->rd);
3272
3273         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3274     }
3275     return true;
3276 }
3277
3278 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3279 {
3280     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3281         return false;
3282     }
3283     if (sve_access_check(s)) {
3284         unsigned vsz = vec_full_reg_size(s);
3285         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3286                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3287     }
3288     return true;
3289 }
3290
3291 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3292 {
3293     a->imm = -a->imm;
3294     return trans_ADD_zzi(s, a);
3295 }
3296
3297 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3298 {
3299     static const GVecGen2s op[4] = {
3300         { .fni8 = tcg_gen_vec_sub8_i64,
3301           .fniv = tcg_gen_sub_vec,
3302           .fno = gen_helper_sve_subri_b,
3303           .opc = INDEX_op_sub_vec,
3304           .vece = MO_8,
3305           .scalar_first = true },
3306         { .fni8 = tcg_gen_vec_sub16_i64,
3307           .fniv = tcg_gen_sub_vec,
3308           .fno = gen_helper_sve_subri_h,
3309           .opc = INDEX_op_sub_vec,
3310           .vece = MO_16,
3311           .scalar_first = true },
3312         { .fni4 = tcg_gen_sub_i32,
3313           .fniv = tcg_gen_sub_vec,
3314           .fno = gen_helper_sve_subri_s,
3315           .opc = INDEX_op_sub_vec,
3316           .vece = MO_32,
3317           .scalar_first = true },
3318         { .fni8 = tcg_gen_sub_i64,
3319           .fniv = tcg_gen_sub_vec,
3320           .fno = gen_helper_sve_subri_d,
3321           .opc = INDEX_op_sub_vec,
3322           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3323           .vece = MO_64,
3324           .scalar_first = true }
3325     };
3326
3327     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3328         return false;
3329     }
3330     if (sve_access_check(s)) {
3331         unsigned vsz = vec_full_reg_size(s);
3332         TCGv_i64 c = tcg_const_i64(a->imm);
3333         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3334                         vec_full_reg_offset(s, a->rn),
3335                         vsz, vsz, c, &op[a->esz]);
3336         tcg_temp_free_i64(c);
3337     }
3338     return true;
3339 }
3340
3341 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3342 {
3343     if (sve_access_check(s)) {
3344         unsigned vsz = vec_full_reg_size(s);
3345         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3346                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3347     }
3348     return true;
3349 }
3350
3351 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3352 {
3353     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3354         return false;
3355     }
3356     if (sve_access_check(s)) {
3357         TCGv_i64 val = tcg_const_i64(a->imm);
3358         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3359         tcg_temp_free_i64(val);
3360     }
3361     return true;
3362 }
3363
3364 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3365 {
3366     return do_zzi_sat(s, a, false, false);
3367 }
3368
3369 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3370 {
3371     return do_zzi_sat(s, a, true, false);
3372 }
3373
3374 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3375 {
3376     return do_zzi_sat(s, a, false, true);
3377 }
3378
3379 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3380 {
3381     return do_zzi_sat(s, a, true, true);
3382 }
3383
3384 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3385 {
3386     if (sve_access_check(s)) {
3387         unsigned vsz = vec_full_reg_size(s);
3388         TCGv_i64 c = tcg_const_i64(a->imm);
3389
3390         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3391                             vec_full_reg_offset(s, a->rn),
3392                             c, vsz, vsz, 0, fn);
3393         tcg_temp_free_i64(c);
3394     }
3395     return true;
3396 }
3397
3398 #define DO_ZZI(NAME, name) \
3399 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3400 {                                                                       \
3401     static gen_helper_gvec_2i * const fns[4] = {                        \
3402         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3403         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3404     };                                                                  \
3405     return do_zzi_ool(s, a, fns[a->esz]);                               \
3406 }
3407
3408 DO_ZZI(SMAX, smax)
3409 DO_ZZI(UMAX, umax)
3410 DO_ZZI(SMIN, smin)
3411 DO_ZZI(UMIN, umin)
3412
3413 #undef DO_ZZI
3414
3415 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3416 {
3417     static gen_helper_gvec_3 * const fns[2][2] = {
3418         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3419         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3420     };
3421
3422     if (sve_access_check(s)) {
3423         unsigned vsz = vec_full_reg_size(s);
3424         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3425                            vec_full_reg_offset(s, a->rn),
3426                            vec_full_reg_offset(s, a->rm),
3427                            vsz, vsz, 0, fns[a->u][a->sz]);
3428     }
3429     return true;
3430 }
3431
3432 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3433 {
3434     static gen_helper_gvec_3 * const fns[2][2] = {
3435         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3436         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3437     };
3438
3439     if (sve_access_check(s)) {
3440         unsigned vsz = vec_full_reg_size(s);
3441         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3442                            vec_full_reg_offset(s, a->rn),
3443                            vec_full_reg_offset(s, a->rm),
3444                            vsz, vsz, a->index, fns[a->u][a->sz]);
3445     }
3446     return true;
3447 }
3448
3449
3450 /*
3451  *** SVE Floating Point Multiply-Add Indexed Group
3452  */
3453
3454 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3455 {
3456     static gen_helper_gvec_4_ptr * const fns[3] = {
3457         gen_helper_gvec_fmla_idx_h,
3458         gen_helper_gvec_fmla_idx_s,
3459         gen_helper_gvec_fmla_idx_d,
3460     };
3461
3462     if (sve_access_check(s)) {
3463         unsigned vsz = vec_full_reg_size(s);
3464         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3465         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3466                            vec_full_reg_offset(s, a->rn),
3467                            vec_full_reg_offset(s, a->rm),
3468                            vec_full_reg_offset(s, a->ra),
3469                            status, vsz, vsz, (a->index << 1) | a->sub,
3470                            fns[a->esz - 1]);
3471         tcg_temp_free_ptr(status);
3472     }
3473     return true;
3474 }
3475
3476 /*
3477  *** SVE Floating Point Multiply Indexed Group
3478  */
3479
3480 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3481 {
3482     static gen_helper_gvec_3_ptr * const fns[3] = {
3483         gen_helper_gvec_fmul_idx_h,
3484         gen_helper_gvec_fmul_idx_s,
3485         gen_helper_gvec_fmul_idx_d,
3486     };
3487
3488     if (sve_access_check(s)) {
3489         unsigned vsz = vec_full_reg_size(s);
3490         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3491         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3492                            vec_full_reg_offset(s, a->rn),
3493                            vec_full_reg_offset(s, a->rm),
3494                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3495         tcg_temp_free_ptr(status);
3496     }
3497     return true;
3498 }
3499
3500 /*
3501  *** SVE Floating Point Fast Reduction Group
3502  */
3503
3504 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3505                                   TCGv_ptr, TCGv_i32);
3506
3507 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3508                       gen_helper_fp_reduce *fn)
3509 {
3510     unsigned vsz = vec_full_reg_size(s);
3511     unsigned p2vsz = pow2ceil(vsz);
3512     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3513     TCGv_ptr t_zn, t_pg, status;
3514     TCGv_i64 temp;
3515
3516     temp = tcg_temp_new_i64();
3517     t_zn = tcg_temp_new_ptr();
3518     t_pg = tcg_temp_new_ptr();
3519
3520     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3521     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3522     status = get_fpstatus_ptr(a->esz == MO_16);
3523
3524     fn(temp, t_zn, t_pg, status, t_desc);
3525     tcg_temp_free_ptr(t_zn);
3526     tcg_temp_free_ptr(t_pg);
3527     tcg_temp_free_ptr(status);
3528     tcg_temp_free_i32(t_desc);
3529
3530     write_fp_dreg(s, a->rd, temp);
3531     tcg_temp_free_i64(temp);
3532 }
3533
3534 #define DO_VPZ(NAME, name) \
3535 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3536 {                                                                        \
3537     static gen_helper_fp_reduce * const fns[3] = {                       \
3538         gen_helper_sve_##name##_h,                                       \
3539         gen_helper_sve_##name##_s,                                       \
3540         gen_helper_sve_##name##_d,                                       \
3541     };                                                                   \
3542     if (a->esz == 0) {                                                   \
3543         return false;                                                    \
3544     }                                                                    \
3545     if (sve_access_check(s)) {                                           \
3546         do_reduce(s, a, fns[a->esz - 1]);                                \
3547     }                                                                    \
3548     return true;                                                         \
3549 }
3550
3551 DO_VPZ(FADDV, faddv)
3552 DO_VPZ(FMINNMV, fminnmv)
3553 DO_VPZ(FMAXNMV, fmaxnmv)
3554 DO_VPZ(FMINV, fminv)
3555 DO_VPZ(FMAXV, fmaxv)
3556
3557 /*
3558  *** SVE Floating Point Unary Operations - Unpredicated Group
3559  */
3560
3561 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3562 {
3563     unsigned vsz = vec_full_reg_size(s);
3564     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3565
3566     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3567                        vec_full_reg_offset(s, a->rn),
3568                        status, vsz, vsz, 0, fn);
3569     tcg_temp_free_ptr(status);
3570 }
3571
3572 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3573 {
3574     static gen_helper_gvec_2_ptr * const fns[3] = {
3575         gen_helper_gvec_frecpe_h,
3576         gen_helper_gvec_frecpe_s,
3577         gen_helper_gvec_frecpe_d,
3578     };
3579     if (a->esz == 0) {
3580         return false;
3581     }
3582     if (sve_access_check(s)) {
3583         do_zz_fp(s, a, fns[a->esz - 1]);
3584     }
3585     return true;
3586 }
3587
3588 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3589 {
3590     static gen_helper_gvec_2_ptr * const fns[3] = {
3591         gen_helper_gvec_frsqrte_h,
3592         gen_helper_gvec_frsqrte_s,
3593         gen_helper_gvec_frsqrte_d,
3594     };
3595     if (a->esz == 0) {
3596         return false;
3597     }
3598     if (sve_access_check(s)) {
3599         do_zz_fp(s, a, fns[a->esz - 1]);
3600     }
3601     return true;
3602 }
3603
3604 /*
3605  *** SVE Floating Point Compare with Zero Group
3606  */
3607
3608 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3609                       gen_helper_gvec_3_ptr *fn)
3610 {
3611     unsigned vsz = vec_full_reg_size(s);
3612     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3613
3614     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3615                        vec_full_reg_offset(s, a->rn),
3616                        pred_full_reg_offset(s, a->pg),
3617                        status, vsz, vsz, 0, fn);
3618     tcg_temp_free_ptr(status);
3619 }
3620
3621 #define DO_PPZ(NAME, name) \
3622 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3623 {                                                                 \
3624     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3625         gen_helper_sve_##name##_h,                                \
3626         gen_helper_sve_##name##_s,                                \
3627         gen_helper_sve_##name##_d,                                \
3628     };                                                            \
3629     if (a->esz == 0) {                                            \
3630         return false;                                             \
3631     }                                                             \
3632     if (sve_access_check(s)) {                                    \
3633         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3634     }                                                             \
3635     return true;                                                  \
3636 }
3637
3638 DO_PPZ(FCMGE_ppz0, fcmge0)
3639 DO_PPZ(FCMGT_ppz0, fcmgt0)
3640 DO_PPZ(FCMLE_ppz0, fcmle0)
3641 DO_PPZ(FCMLT_ppz0, fcmlt0)
3642 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3643 DO_PPZ(FCMNE_ppz0, fcmne0)
3644
3645 #undef DO_PPZ
3646
3647 /*
3648  *** SVE floating-point trig multiply-add coefficient
3649  */
3650
3651 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3652 {
3653     static gen_helper_gvec_3_ptr * const fns[3] = {
3654         gen_helper_sve_ftmad_h,
3655         gen_helper_sve_ftmad_s,
3656         gen_helper_sve_ftmad_d,
3657     };
3658
3659     if (a->esz == 0) {
3660         return false;
3661     }
3662     if (sve_access_check(s)) {
3663         unsigned vsz = vec_full_reg_size(s);
3664         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3665         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3666                            vec_full_reg_offset(s, a->rn),
3667                            vec_full_reg_offset(s, a->rm),
3668                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3669         tcg_temp_free_ptr(status);
3670     }
3671     return true;
3672 }
3673
3674 /*
3675  *** SVE Floating Point Accumulating Reduction Group
3676  */
3677
3678 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3679 {
3680     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3681                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3682     static fadda_fn * const fns[3] = {
3683         gen_helper_sve_fadda_h,
3684         gen_helper_sve_fadda_s,
3685         gen_helper_sve_fadda_d,
3686     };
3687     unsigned vsz = vec_full_reg_size(s);
3688     TCGv_ptr t_rm, t_pg, t_fpst;
3689     TCGv_i64 t_val;
3690     TCGv_i32 t_desc;
3691
3692     if (a->esz == 0) {
3693         return false;
3694     }
3695     if (!sve_access_check(s)) {
3696         return true;
3697     }
3698
3699     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3700     t_rm = tcg_temp_new_ptr();
3701     t_pg = tcg_temp_new_ptr();
3702     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3703     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3704     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3705     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3706
3707     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3708
3709     tcg_temp_free_i32(t_desc);
3710     tcg_temp_free_ptr(t_fpst);
3711     tcg_temp_free_ptr(t_pg);
3712     tcg_temp_free_ptr(t_rm);
3713
3714     write_fp_dreg(s, a->rd, t_val);
3715     tcg_temp_free_i64(t_val);
3716     return true;
3717 }
3718
3719 /*
3720  *** SVE Floating Point Arithmetic - Unpredicated Group
3721  */
3722
3723 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3724                       gen_helper_gvec_3_ptr *fn)
3725 {
3726     if (fn == NULL) {
3727         return false;
3728     }
3729     if (sve_access_check(s)) {
3730         unsigned vsz = vec_full_reg_size(s);
3731         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3732         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3733                            vec_full_reg_offset(s, a->rn),
3734                            vec_full_reg_offset(s, a->rm),
3735                            status, vsz, vsz, 0, fn);
3736         tcg_temp_free_ptr(status);
3737     }
3738     return true;
3739 }
3740
3741
3742 #define DO_FP3(NAME, name) \
3743 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3744 {                                                                   \
3745     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3746         NULL, gen_helper_gvec_##name##_h,                           \
3747         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3748     };                                                              \
3749     return do_zzz_fp(s, a, fns[a->esz]);                            \
3750 }
3751
3752 DO_FP3(FADD_zzz, fadd)
3753 DO_FP3(FSUB_zzz, fsub)
3754 DO_FP3(FMUL_zzz, fmul)
3755 DO_FP3(FTSMUL, ftsmul)
3756 DO_FP3(FRECPS, recps)
3757 DO_FP3(FRSQRTS, rsqrts)
3758
3759 #undef DO_FP3
3760
3761 /*
3762  *** SVE Floating Point Arithmetic - Predicated Group
3763  */
3764
3765 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3766                        gen_helper_gvec_4_ptr *fn)
3767 {
3768     if (fn == NULL) {
3769         return false;
3770     }
3771     if (sve_access_check(s)) {
3772         unsigned vsz = vec_full_reg_size(s);
3773         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3774         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3775                            vec_full_reg_offset(s, a->rn),
3776                            vec_full_reg_offset(s, a->rm),
3777                            pred_full_reg_offset(s, a->pg),
3778                            status, vsz, vsz, 0, fn);
3779         tcg_temp_free_ptr(status);
3780     }
3781     return true;
3782 }
3783
3784 #define DO_FP3(NAME, name) \
3785 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3786 {                                                                   \
3787     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3788         NULL, gen_helper_sve_##name##_h,                            \
3789         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3790     };                                                              \
3791     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3792 }
3793
3794 DO_FP3(FADD_zpzz, fadd)
3795 DO_FP3(FSUB_zpzz, fsub)
3796 DO_FP3(FMUL_zpzz, fmul)
3797 DO_FP3(FMIN_zpzz, fmin)
3798 DO_FP3(FMAX_zpzz, fmax)
3799 DO_FP3(FMINNM_zpzz, fminnum)
3800 DO_FP3(FMAXNM_zpzz, fmaxnum)
3801 DO_FP3(FABD, fabd)
3802 DO_FP3(FSCALE, fscalbn)
3803 DO_FP3(FDIV, fdiv)
3804 DO_FP3(FMULX, fmulx)
3805
3806 #undef DO_FP3
3807
3808 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3809                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3810
3811 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3812                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3813 {
3814     unsigned vsz = vec_full_reg_size(s);
3815     TCGv_ptr t_zd, t_zn, t_pg, status;
3816     TCGv_i32 desc;
3817
3818     t_zd = tcg_temp_new_ptr();
3819     t_zn = tcg_temp_new_ptr();
3820     t_pg = tcg_temp_new_ptr();
3821     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3822     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3823     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3824
3825     status = get_fpstatus_ptr(is_fp16);
3826     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3827     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3828
3829     tcg_temp_free_i32(desc);
3830     tcg_temp_free_ptr(status);
3831     tcg_temp_free_ptr(t_pg);
3832     tcg_temp_free_ptr(t_zn);
3833     tcg_temp_free_ptr(t_zd);
3834 }
3835
3836 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3837                       gen_helper_sve_fp2scalar *fn)
3838 {
3839     TCGv_i64 temp = tcg_const_i64(imm);
3840     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3841     tcg_temp_free_i64(temp);
3842 }
3843
3844 #define DO_FP_IMM(NAME, name, const0, const1) \
3845 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3846 {                                                                         \
3847     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3848         gen_helper_sve_##name##_h,                                        \
3849         gen_helper_sve_##name##_s,                                        \
3850         gen_helper_sve_##name##_d                                         \
3851     };                                                                    \
3852     static uint64_t const val[3][2] = {                                   \
3853         { float16_##const0, float16_##const1 },                           \
3854         { float32_##const0, float32_##const1 },                           \
3855         { float64_##const0, float64_##const1 },                           \
3856     };                                                                    \
3857     if (a->esz == 0) {                                                    \
3858         return false;                                                     \
3859     }                                                                     \
3860     if (sve_access_check(s)) {                                            \
3861         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3862     }                                                                     \
3863     return true;                                                          \
3864 }
3865
3866 #define float16_two  make_float16(0x4000)
3867 #define float32_two  make_float32(0x40000000)
3868 #define float64_two  make_float64(0x4000000000000000ULL)
3869
3870 DO_FP_IMM(FADD, fadds, half, one)
3871 DO_FP_IMM(FSUB, fsubs, half, one)
3872 DO_FP_IMM(FMUL, fmuls, half, two)
3873 DO_FP_IMM(FSUBR, fsubrs, half, one)
3874 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3875 DO_FP_IMM(FMINNM, fminnms, zero, one)
3876 DO_FP_IMM(FMAX, fmaxs, zero, one)
3877 DO_FP_IMM(FMIN, fmins, zero, one)
3878
3879 #undef DO_FP_IMM
3880
3881 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3882                       gen_helper_gvec_4_ptr *fn)
3883 {
3884     if (fn == NULL) {
3885         return false;
3886     }
3887     if (sve_access_check(s)) {
3888         unsigned vsz = vec_full_reg_size(s);
3889         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3890         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3891                            vec_full_reg_offset(s, a->rn),
3892                            vec_full_reg_offset(s, a->rm),
3893                            pred_full_reg_offset(s, a->pg),
3894                            status, vsz, vsz, 0, fn);
3895         tcg_temp_free_ptr(status);
3896     }
3897     return true;
3898 }
3899
3900 #define DO_FPCMP(NAME, name) \
3901 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3902 {                                                                     \
3903     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3904         NULL, gen_helper_sve_##name##_h,                              \
3905         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3906     };                                                                \
3907     return do_fp_cmp(s, a, fns[a->esz]);                              \
3908 }
3909
3910 DO_FPCMP(FCMGE, fcmge)
3911 DO_FPCMP(FCMGT, fcmgt)
3912 DO_FPCMP(FCMEQ, fcmeq)
3913 DO_FPCMP(FCMNE, fcmne)
3914 DO_FPCMP(FCMUO, fcmuo)
3915 DO_FPCMP(FACGE, facge)
3916 DO_FPCMP(FACGT, facgt)
3917
3918 #undef DO_FPCMP
3919
3920 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3921 {
3922     static gen_helper_gvec_4_ptr * const fns[3] = {
3923         gen_helper_sve_fcadd_h,
3924         gen_helper_sve_fcadd_s,
3925         gen_helper_sve_fcadd_d
3926     };
3927
3928     if (a->esz == 0) {
3929         return false;
3930     }
3931     if (sve_access_check(s)) {
3932         unsigned vsz = vec_full_reg_size(s);
3933         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3934         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3935                            vec_full_reg_offset(s, a->rn),
3936                            vec_full_reg_offset(s, a->rm),
3937                            pred_full_reg_offset(s, a->pg),
3938                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3939         tcg_temp_free_ptr(status);
3940     }
3941     return true;
3942 }
3943
3944 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3945
3946 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3947 {
3948     if (fn == NULL) {
3949         return false;
3950     }
3951     if (!sve_access_check(s)) {
3952         return true;
3953     }
3954
3955     unsigned vsz = vec_full_reg_size(s);
3956     unsigned desc;
3957     TCGv_i32 t_desc;
3958     TCGv_ptr pg = tcg_temp_new_ptr();
3959
3960     /* We would need 7 operands to pass these arguments "properly".
3961      * So we encode all the register numbers into the descriptor.
3962      */
3963     desc = deposit32(a->rd, 5, 5, a->rn);
3964     desc = deposit32(desc, 10, 5, a->rm);
3965     desc = deposit32(desc, 15, 5, a->ra);
3966     desc = simd_desc(vsz, vsz, desc);
3967
3968     t_desc = tcg_const_i32(desc);
3969     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3970     fn(cpu_env, pg, t_desc);
3971     tcg_temp_free_i32(t_desc);
3972     tcg_temp_free_ptr(pg);
3973     return true;
3974 }
3975
3976 #define DO_FMLA(NAME, name) \
3977 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3978 {                                                                    \
3979     static gen_helper_sve_fmla * const fns[4] = {                    \
3980         NULL, gen_helper_sve_##name##_h,                             \
3981         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3982     };                                                               \
3983     return do_fmla(s, a, fns[a->esz]);                               \
3984 }
3985
3986 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3987 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3988 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3989 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3990
3991 #undef DO_FMLA
3992
3993 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3994 {
3995     static gen_helper_sve_fmla * const fns[3] = {
3996         gen_helper_sve_fcmla_zpzzz_h,
3997         gen_helper_sve_fcmla_zpzzz_s,
3998         gen_helper_sve_fcmla_zpzzz_d,
3999     };
4000
4001     if (a->esz == 0) {
4002         return false;
4003     }
4004     if (sve_access_check(s)) {
4005         unsigned vsz = vec_full_reg_size(s);
4006         unsigned desc;
4007         TCGv_i32 t_desc;
4008         TCGv_ptr pg = tcg_temp_new_ptr();
4009
4010         /* We would need 7 operands to pass these arguments "properly".
4011          * So we encode all the register numbers into the descriptor.
4012          */
4013         desc = deposit32(a->rd, 5, 5, a->rn);
4014         desc = deposit32(desc, 10, 5, a->rm);
4015         desc = deposit32(desc, 15, 5, a->ra);
4016         desc = deposit32(desc, 20, 2, a->rot);
4017         desc = sextract32(desc, 0, 22);
4018         desc = simd_desc(vsz, vsz, desc);
4019
4020         t_desc = tcg_const_i32(desc);
4021         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4022         fns[a->esz - 1](cpu_env, pg, t_desc);
4023         tcg_temp_free_i32(t_desc);
4024         tcg_temp_free_ptr(pg);
4025     }
4026     return true;
4027 }
4028
4029 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4030 {
4031     static gen_helper_gvec_3_ptr * const fns[2] = {
4032         gen_helper_gvec_fcmlah_idx,
4033         gen_helper_gvec_fcmlas_idx,
4034     };
4035
4036     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4037     tcg_debug_assert(a->rd == a->ra);
4038     if (sve_access_check(s)) {
4039         unsigned vsz = vec_full_reg_size(s);
4040         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4041         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4042                            vec_full_reg_offset(s, a->rn),
4043                            vec_full_reg_offset(s, a->rm),
4044                            status, vsz, vsz,
4045                            a->index * 4 + a->rot,
4046                            fns[a->esz - 1]);
4047         tcg_temp_free_ptr(status);
4048     }
4049     return true;
4050 }
4051
4052 /*
4053  *** SVE Floating Point Unary Operations Predicated Group
4054  */
4055
4056 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4057                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4058 {
4059     if (sve_access_check(s)) {
4060         unsigned vsz = vec_full_reg_size(s);
4061         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4062         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4063                            vec_full_reg_offset(s, rn),
4064                            pred_full_reg_offset(s, pg),
4065                            status, vsz, vsz, 0, fn);
4066         tcg_temp_free_ptr(status);
4067     }
4068     return true;
4069 }
4070
4071 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4072 {
4073     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4074 }
4075
4076 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4077 {
4078     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4079 }
4080
4081 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4082 {
4083     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4084 }
4085
4086 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4087 {
4088     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4089 }
4090
4091 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4092 {
4093     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4094 }
4095
4096 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4097 {
4098     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4099 }
4100
4101 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4102 {
4103     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4104 }
4105
4106 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4107 {
4108     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4109 }
4110
4111 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4112 {
4113     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4114 }
4115
4116 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4117 {
4118     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4119 }
4120
4121 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4122 {
4123     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4124 }
4125
4126 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4127 {
4128     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4129 }
4130
4131 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4132 {
4133     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4134 }
4135
4136 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4137 {
4138     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4139 }
4140
4141 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4142 {
4143     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4144 }
4145
4146 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4147 {
4148     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4149 }
4150
4151 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4152 {
4153     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4154 }
4155
4156 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4157 {
4158     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4159 }
4160
4161 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4162 {
4163     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4164 }
4165
4166 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4167 {
4168     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4169 }
4170
4171 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4172     gen_helper_sve_frint_h,
4173     gen_helper_sve_frint_s,
4174     gen_helper_sve_frint_d
4175 };
4176
4177 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4178 {
4179     if (a->esz == 0) {
4180         return false;
4181     }
4182     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4183                       frint_fns[a->esz - 1]);
4184 }
4185
4186 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4187 {
4188     static gen_helper_gvec_3_ptr * const fns[3] = {
4189         gen_helper_sve_frintx_h,
4190         gen_helper_sve_frintx_s,
4191         gen_helper_sve_frintx_d
4192     };
4193     if (a->esz == 0) {
4194         return false;
4195     }
4196     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4197 }
4198
4199 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4200 {
4201     if (a->esz == 0) {
4202         return false;
4203     }
4204     if (sve_access_check(s)) {
4205         unsigned vsz = vec_full_reg_size(s);
4206         TCGv_i32 tmode = tcg_const_i32(mode);
4207         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4208
4209         gen_helper_set_rmode(tmode, tmode, status);
4210
4211         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4212                            vec_full_reg_offset(s, a->rn),
4213                            pred_full_reg_offset(s, a->pg),
4214                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4215
4216         gen_helper_set_rmode(tmode, tmode, status);
4217         tcg_temp_free_i32(tmode);
4218         tcg_temp_free_ptr(status);
4219     }
4220     return true;
4221 }
4222
4223 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4224 {
4225     return do_frint_mode(s, a, float_round_nearest_even);
4226 }
4227
4228 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4229 {
4230     return do_frint_mode(s, a, float_round_up);
4231 }
4232
4233 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4234 {
4235     return do_frint_mode(s, a, float_round_down);
4236 }
4237
4238 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4239 {
4240     return do_frint_mode(s, a, float_round_to_zero);
4241 }
4242
4243 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4244 {
4245     return do_frint_mode(s, a, float_round_ties_away);
4246 }
4247
4248 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4249 {
4250     static gen_helper_gvec_3_ptr * const fns[3] = {
4251         gen_helper_sve_frecpx_h,
4252         gen_helper_sve_frecpx_s,
4253         gen_helper_sve_frecpx_d
4254     };
4255     if (a->esz == 0) {
4256         return false;
4257     }
4258     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4259 }
4260
4261 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4262 {
4263     static gen_helper_gvec_3_ptr * const fns[3] = {
4264         gen_helper_sve_fsqrt_h,
4265         gen_helper_sve_fsqrt_s,
4266         gen_helper_sve_fsqrt_d
4267     };
4268     if (a->esz == 0) {
4269         return false;
4270     }
4271     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4272 }
4273
4274 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4275 {
4276     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4277 }
4278
4279 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4280 {
4281     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4282 }
4283
4284 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4285 {
4286     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4287 }
4288
4289 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4290 {
4291     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4292 }
4293
4294 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4295 {
4296     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4297 }
4298
4299 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4300 {
4301     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4302 }
4303
4304 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4305 {
4306     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4307 }
4308
4309 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4310 {
4311     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4312 }
4313
4314 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4315 {
4316     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4317 }
4318
4319 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4320 {
4321     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4322 }
4323
4324 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4325 {
4326     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4327 }
4328
4329 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4330 {
4331     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4332 }
4333
4334 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4335 {
4336     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4337 }
4338
4339 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4340 {
4341     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4342 }
4343
4344 /*
4345  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4346  */
4347
4348 /* Subroutine loading a vector register at VOFS of LEN bytes.
4349  * The load should begin at the address Rn + IMM.
4350  */
4351
4352 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4353 {
4354     int len_align = QEMU_ALIGN_DOWN(len, 8);
4355     int len_remain = len % 8;
4356     int nparts = len / 8 + ctpop8(len_remain);
4357     int midx = get_mem_index(s);
4358     TCGv_i64 addr, t0, t1;
4359
4360     addr = tcg_temp_new_i64();
4361     t0 = tcg_temp_new_i64();
4362
4363     /* Note that unpredicated load/store of vector/predicate registers
4364      * are defined as a stream of bytes, which equates to little-endian
4365      * operations on larger quantities.  There is no nice way to force
4366      * a little-endian load for aarch64_be-linux-user out of line.
4367      *
4368      * Attempt to keep code expansion to a minimum by limiting the
4369      * amount of unrolling done.
4370      */
4371     if (nparts <= 4) {
4372         int i;
4373
4374         for (i = 0; i < len_align; i += 8) {
4375             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4376             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4377             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4378         }
4379     } else {
4380         TCGLabel *loop = gen_new_label();
4381         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4382
4383         gen_set_label(loop);
4384
4385         /* Minimize the number of local temps that must be re-read from
4386          * the stack each iteration.  Instead, re-compute values other
4387          * than the loop counter.
4388          */
4389         tp = tcg_temp_new_ptr();
4390         tcg_gen_addi_ptr(tp, i, imm);
4391         tcg_gen_extu_ptr_i64(addr, tp);
4392         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4393
4394         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4395
4396         tcg_gen_add_ptr(tp, cpu_env, i);
4397         tcg_gen_addi_ptr(i, i, 8);
4398         tcg_gen_st_i64(t0, tp, vofs);
4399         tcg_temp_free_ptr(tp);
4400
4401         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4402         tcg_temp_free_ptr(i);
4403     }
4404
4405     /* Predicate register loads can be any multiple of 2.
4406      * Note that we still store the entire 64-bit unit into cpu_env.
4407      */
4408     if (len_remain) {
4409         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4410
4411         switch (len_remain) {
4412         case 2:
4413         case 4:
4414         case 8:
4415             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4416             break;
4417
4418         case 6:
4419             t1 = tcg_temp_new_i64();
4420             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4421             tcg_gen_addi_i64(addr, addr, 4);
4422             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4423             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4424             tcg_temp_free_i64(t1);
4425             break;
4426
4427         default:
4428             g_assert_not_reached();
4429         }
4430         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4431     }
4432     tcg_temp_free_i64(addr);
4433     tcg_temp_free_i64(t0);
4434 }
4435
4436 /* Similarly for stores.  */
4437 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4438 {
4439     int len_align = QEMU_ALIGN_DOWN(len, 8);
4440     int len_remain = len % 8;
4441     int nparts = len / 8 + ctpop8(len_remain);
4442     int midx = get_mem_index(s);
4443     TCGv_i64 addr, t0;
4444
4445     addr = tcg_temp_new_i64();
4446     t0 = tcg_temp_new_i64();
4447
4448     /* Note that unpredicated load/store of vector/predicate registers
4449      * are defined as a stream of bytes, which equates to little-endian
4450      * operations on larger quantities.  There is no nice way to force
4451      * a little-endian store for aarch64_be-linux-user out of line.
4452      *
4453      * Attempt to keep code expansion to a minimum by limiting the
4454      * amount of unrolling done.
4455      */
4456     if (nparts <= 4) {
4457         int i;
4458
4459         for (i = 0; i < len_align; i += 8) {
4460             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4461             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4462             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4463         }
4464     } else {
4465         TCGLabel *loop = gen_new_label();
4466         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4467
4468         gen_set_label(loop);
4469
4470         t2 = tcg_temp_new_ptr();
4471         tcg_gen_add_ptr(t2, cpu_env, i);
4472         tcg_gen_ld_i64(t0, t2, vofs);
4473
4474         /* Minimize the number of local temps that must be re-read from
4475          * the stack each iteration.  Instead, re-compute values other
4476          * than the loop counter.
4477          */
4478         tcg_gen_addi_ptr(t2, i, imm);
4479         tcg_gen_extu_ptr_i64(addr, t2);
4480         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4481         tcg_temp_free_ptr(t2);
4482
4483         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4484
4485         tcg_gen_addi_ptr(i, i, 8);
4486
4487         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4488         tcg_temp_free_ptr(i);
4489     }
4490
4491     /* Predicate register stores can be any multiple of 2.  */
4492     if (len_remain) {
4493         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4494         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4495
4496         switch (len_remain) {
4497         case 2:
4498         case 4:
4499         case 8:
4500             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4501             break;
4502
4503         case 6:
4504             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4505             tcg_gen_addi_i64(addr, addr, 4);
4506             tcg_gen_shri_i64(t0, t0, 32);
4507             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4508             break;
4509
4510         default:
4511             g_assert_not_reached();
4512         }
4513     }
4514     tcg_temp_free_i64(addr);
4515     tcg_temp_free_i64(t0);
4516 }
4517
4518 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4519 {
4520     if (sve_access_check(s)) {
4521         int size = vec_full_reg_size(s);
4522         int off = vec_full_reg_offset(s, a->rd);
4523         do_ldr(s, off, size, a->rn, a->imm * size);
4524     }
4525     return true;
4526 }
4527
4528 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4529 {
4530     if (sve_access_check(s)) {
4531         int size = pred_full_reg_size(s);
4532         int off = pred_full_reg_offset(s, a->rd);
4533         do_ldr(s, off, size, a->rn, a->imm * size);
4534     }
4535     return true;
4536 }
4537
4538 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4539 {
4540     if (sve_access_check(s)) {
4541         int size = vec_full_reg_size(s);
4542         int off = vec_full_reg_offset(s, a->rd);
4543         do_str(s, off, size, a->rn, a->imm * size);
4544     }
4545     return true;
4546 }
4547
4548 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4549 {
4550     if (sve_access_check(s)) {
4551         int size = pred_full_reg_size(s);
4552         int off = pred_full_reg_offset(s, a->rd);
4553         do_str(s, off, size, a->rn, a->imm * size);
4554     }
4555     return true;
4556 }
4557
4558 /*
4559  *** SVE Memory - Contiguous Load Group
4560  */
4561
4562 /* The memory mode of the dtype.  */
4563 static const TCGMemOp dtype_mop[16] = {
4564     MO_UB, MO_UB, MO_UB, MO_UB,
4565     MO_SL, MO_UW, MO_UW, MO_UW,
4566     MO_SW, MO_SW, MO_UL, MO_UL,
4567     MO_SB, MO_SB, MO_SB, MO_Q
4568 };
4569
4570 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4571
4572 /* The vector element size of dtype.  */
4573 static const uint8_t dtype_esz[16] = {
4574     0, 1, 2, 3,
4575     3, 1, 2, 3,
4576     3, 2, 2, 3,
4577     3, 2, 1, 3
4578 };
4579
4580 static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4581 {
4582     return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4583 }
4584
4585 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4586                        int dtype, gen_helper_gvec_mem *fn)
4587 {
4588     unsigned vsz = vec_full_reg_size(s);
4589     TCGv_ptr t_pg;
4590     TCGv_i32 t_desc;
4591     int desc;
4592
4593     /* For e.g. LD4, there are not enough arguments to pass all 4
4594      * registers as pointers, so encode the regno into the data field.
4595      * For consistency, do this even for LD1.
4596      */
4597     desc = sve_memopidx(s, dtype);
4598     desc |= zt << MEMOPIDX_SHIFT;
4599     desc = simd_desc(vsz, vsz, desc);
4600     t_desc = tcg_const_i32(desc);
4601     t_pg = tcg_temp_new_ptr();
4602
4603     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4604     fn(cpu_env, t_pg, addr, t_desc);
4605
4606     tcg_temp_free_ptr(t_pg);
4607     tcg_temp_free_i32(t_desc);
4608 }
4609
4610 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4611                       TCGv_i64 addr, int dtype, int nreg)
4612 {
4613     static gen_helper_gvec_mem * const fns[2][16][4] = {
4614         /* Little-endian */
4615         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4616             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4617           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4618           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4619           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4620
4621           { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4622           { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4623             gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4624           { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4625           { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4626
4627           { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4628           { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4629           { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4630             gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4631           { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4632
4633           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4634           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4635           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4636           { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4637             gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4638
4639         /* Big-endian */
4640         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4641             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4642           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4643           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4644           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4645
4646           { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4647           { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4648             gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4649           { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4650           { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4651
4652           { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4653           { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4654           { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4655             gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4656           { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4657
4658           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4659           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4660           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4661           { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4662             gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4663     };
4664     gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4665
4666     /* While there are holes in the table, they are not
4667      * accessible via the instruction encoding.
4668      */
4669     assert(fn != NULL);
4670     do_mem_zpa(s, zt, pg, addr, dtype, fn);
4671 }
4672
4673 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4674 {
4675     if (a->rm == 31) {
4676         return false;
4677     }
4678     if (sve_access_check(s)) {
4679         TCGv_i64 addr = new_tmp_a64(s);
4680         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4681         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4682         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4683     }
4684     return true;
4685 }
4686
4687 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4688 {
4689     if (sve_access_check(s)) {
4690         int vsz = vec_full_reg_size(s);
4691         int elements = vsz >> dtype_esz[a->dtype];
4692         TCGv_i64 addr = new_tmp_a64(s);
4693
4694         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4695                          (a->imm * elements * (a->nreg + 1))
4696                          << dtype_msz(a->dtype));
4697         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4698     }
4699     return true;
4700 }
4701
4702 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4703 {
4704     static gen_helper_gvec_mem * const fns[2][16] = {
4705         /* Little-endian */
4706         { gen_helper_sve_ldff1bb_r,
4707           gen_helper_sve_ldff1bhu_r,
4708           gen_helper_sve_ldff1bsu_r,
4709           gen_helper_sve_ldff1bdu_r,
4710
4711           gen_helper_sve_ldff1sds_le_r,
4712           gen_helper_sve_ldff1hh_le_r,
4713           gen_helper_sve_ldff1hsu_le_r,
4714           gen_helper_sve_ldff1hdu_le_r,
4715
4716           gen_helper_sve_ldff1hds_le_r,
4717           gen_helper_sve_ldff1hss_le_r,
4718           gen_helper_sve_ldff1ss_le_r,
4719           gen_helper_sve_ldff1sdu_le_r,
4720
4721           gen_helper_sve_ldff1bds_r,
4722           gen_helper_sve_ldff1bss_r,
4723           gen_helper_sve_ldff1bhs_r,
4724           gen_helper_sve_ldff1dd_le_r },
4725
4726         /* Big-endian */
4727         { gen_helper_sve_ldff1bb_r,
4728           gen_helper_sve_ldff1bhu_r,
4729           gen_helper_sve_ldff1bsu_r,
4730           gen_helper_sve_ldff1bdu_r,
4731
4732           gen_helper_sve_ldff1sds_be_r,
4733           gen_helper_sve_ldff1hh_be_r,
4734           gen_helper_sve_ldff1hsu_be_r,
4735           gen_helper_sve_ldff1hdu_be_r,
4736
4737           gen_helper_sve_ldff1hds_be_r,
4738           gen_helper_sve_ldff1hss_be_r,
4739           gen_helper_sve_ldff1ss_be_r,
4740           gen_helper_sve_ldff1sdu_be_r,
4741
4742           gen_helper_sve_ldff1bds_r,
4743           gen_helper_sve_ldff1bss_r,
4744           gen_helper_sve_ldff1bhs_r,
4745           gen_helper_sve_ldff1dd_be_r },
4746     };
4747
4748     if (sve_access_check(s)) {
4749         TCGv_i64 addr = new_tmp_a64(s);
4750         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4751         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4752         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4753                    fns[s->be_data == MO_BE][a->dtype]);
4754     }
4755     return true;
4756 }
4757
4758 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4759 {
4760     static gen_helper_gvec_mem * const fns[2][16] = {
4761         /* Little-endian */
4762         { gen_helper_sve_ldnf1bb_r,
4763           gen_helper_sve_ldnf1bhu_r,
4764           gen_helper_sve_ldnf1bsu_r,
4765           gen_helper_sve_ldnf1bdu_r,
4766
4767           gen_helper_sve_ldnf1sds_le_r,
4768           gen_helper_sve_ldnf1hh_le_r,
4769           gen_helper_sve_ldnf1hsu_le_r,
4770           gen_helper_sve_ldnf1hdu_le_r,
4771
4772           gen_helper_sve_ldnf1hds_le_r,
4773           gen_helper_sve_ldnf1hss_le_r,
4774           gen_helper_sve_ldnf1ss_le_r,
4775           gen_helper_sve_ldnf1sdu_le_r,
4776
4777           gen_helper_sve_ldnf1bds_r,
4778           gen_helper_sve_ldnf1bss_r,
4779           gen_helper_sve_ldnf1bhs_r,
4780           gen_helper_sve_ldnf1dd_le_r },
4781
4782         /* Big-endian */
4783         { gen_helper_sve_ldnf1bb_r,
4784           gen_helper_sve_ldnf1bhu_r,
4785           gen_helper_sve_ldnf1bsu_r,
4786           gen_helper_sve_ldnf1bdu_r,
4787
4788           gen_helper_sve_ldnf1sds_be_r,
4789           gen_helper_sve_ldnf1hh_be_r,
4790           gen_helper_sve_ldnf1hsu_be_r,
4791           gen_helper_sve_ldnf1hdu_be_r,
4792
4793           gen_helper_sve_ldnf1hds_be_r,
4794           gen_helper_sve_ldnf1hss_be_r,
4795           gen_helper_sve_ldnf1ss_be_r,
4796           gen_helper_sve_ldnf1sdu_be_r,
4797
4798           gen_helper_sve_ldnf1bds_r,
4799           gen_helper_sve_ldnf1bss_r,
4800           gen_helper_sve_ldnf1bhs_r,
4801           gen_helper_sve_ldnf1dd_be_r },
4802     };
4803
4804     if (sve_access_check(s)) {
4805         int vsz = vec_full_reg_size(s);
4806         int elements = vsz >> dtype_esz[a->dtype];
4807         int off = (a->imm * elements) << dtype_msz(a->dtype);
4808         TCGv_i64 addr = new_tmp_a64(s);
4809
4810         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4811         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4812                    fns[s->be_data == MO_BE][a->dtype]);
4813     }
4814     return true;
4815 }
4816
4817 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4818 {
4819     static gen_helper_gvec_mem * const fns[2][4] = {
4820         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4821           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4822         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4823           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4824     };
4825     unsigned vsz = vec_full_reg_size(s);
4826     TCGv_ptr t_pg;
4827     TCGv_i32 t_desc;
4828     int desc, poff;
4829
4830     /* Load the first quadword using the normal predicated load helpers.  */
4831     desc = sve_memopidx(s, msz_dtype(msz));
4832     desc |= zt << MEMOPIDX_SHIFT;
4833     desc = simd_desc(16, 16, desc);
4834     t_desc = tcg_const_i32(desc);
4835
4836     poff = pred_full_reg_offset(s, pg);
4837     if (vsz > 16) {
4838         /*
4839          * Zero-extend the first 16 bits of the predicate into a temporary.
4840          * This avoids triggering an assert making sure we don't have bits
4841          * set within a predicate beyond VQ, but we have lowered VQ to 1
4842          * for this load operation.
4843          */
4844         TCGv_i64 tmp = tcg_temp_new_i64();
4845 #ifdef HOST_WORDS_BIGENDIAN
4846         poff += 6;
4847 #endif
4848         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4849
4850         poff = offsetof(CPUARMState, vfp.preg_tmp);
4851         tcg_gen_st_i64(tmp, cpu_env, poff);
4852         tcg_temp_free_i64(tmp);
4853     }
4854
4855     t_pg = tcg_temp_new_ptr();
4856     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4857
4858     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4859
4860     tcg_temp_free_ptr(t_pg);
4861     tcg_temp_free_i32(t_desc);
4862
4863     /* Replicate that first quadword.  */
4864     if (vsz > 16) {
4865         unsigned dofs = vec_full_reg_offset(s, zt);
4866         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4867     }
4868 }
4869
4870 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4871 {
4872     if (a->rm == 31) {
4873         return false;
4874     }
4875     if (sve_access_check(s)) {
4876         int msz = dtype_msz(a->dtype);
4877         TCGv_i64 addr = new_tmp_a64(s);
4878         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4879         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4880         do_ldrq(s, a->rd, a->pg, addr, msz);
4881     }
4882     return true;
4883 }
4884
4885 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4886 {
4887     if (sve_access_check(s)) {
4888         TCGv_i64 addr = new_tmp_a64(s);
4889         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4890         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4891     }
4892     return true;
4893 }
4894
4895 /* Load and broadcast element.  */
4896 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4897 {
4898     if (!sve_access_check(s)) {
4899         return true;
4900     }
4901
4902     unsigned vsz = vec_full_reg_size(s);
4903     unsigned psz = pred_full_reg_size(s);
4904     unsigned esz = dtype_esz[a->dtype];
4905     unsigned msz = dtype_msz(a->dtype);
4906     TCGLabel *over = gen_new_label();
4907     TCGv_i64 temp;
4908
4909     /* If the guarding predicate has no bits set, no load occurs.  */
4910     if (psz <= 8) {
4911         /* Reduce the pred_esz_masks value simply to reduce the
4912          * size of the code generated here.
4913          */
4914         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4915         temp = tcg_temp_new_i64();
4916         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4917         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4918         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4919         tcg_temp_free_i64(temp);
4920     } else {
4921         TCGv_i32 t32 = tcg_temp_new_i32();
4922         find_last_active(s, t32, esz, a->pg);
4923         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4924         tcg_temp_free_i32(t32);
4925     }
4926
4927     /* Load the data.  */
4928     temp = tcg_temp_new_i64();
4929     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4930     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4931                         s->be_data | dtype_mop[a->dtype]);
4932
4933     /* Broadcast to *all* elements.  */
4934     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4935                          vsz, vsz, temp);
4936     tcg_temp_free_i64(temp);
4937
4938     /* Zero the inactive elements.  */
4939     gen_set_label(over);
4940     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4941     return true;
4942 }
4943
4944 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4945                       int msz, int esz, int nreg)
4946 {
4947     static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4948         { { gen_helper_sve_st1bb_r,
4949             gen_helper_sve_st1bh_r,
4950             gen_helper_sve_st1bs_r,
4951             gen_helper_sve_st1bd_r },
4952           { NULL,
4953             gen_helper_sve_st1hh_le_r,
4954             gen_helper_sve_st1hs_le_r,
4955             gen_helper_sve_st1hd_le_r },
4956           { NULL, NULL,
4957             gen_helper_sve_st1ss_le_r,
4958             gen_helper_sve_st1sd_le_r },
4959           { NULL, NULL, NULL,
4960             gen_helper_sve_st1dd_le_r } },
4961         { { gen_helper_sve_st1bb_r,
4962             gen_helper_sve_st1bh_r,
4963             gen_helper_sve_st1bs_r,
4964             gen_helper_sve_st1bd_r },
4965           { NULL,
4966             gen_helper_sve_st1hh_be_r,
4967             gen_helper_sve_st1hs_be_r,
4968             gen_helper_sve_st1hd_be_r },
4969           { NULL, NULL,
4970             gen_helper_sve_st1ss_be_r,
4971             gen_helper_sve_st1sd_be_r },
4972           { NULL, NULL, NULL,
4973             gen_helper_sve_st1dd_be_r } },
4974     };
4975     static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4976         { { gen_helper_sve_st2bb_r,
4977             gen_helper_sve_st2hh_le_r,
4978             gen_helper_sve_st2ss_le_r,
4979             gen_helper_sve_st2dd_le_r },
4980           { gen_helper_sve_st3bb_r,
4981             gen_helper_sve_st3hh_le_r,
4982             gen_helper_sve_st3ss_le_r,
4983             gen_helper_sve_st3dd_le_r },
4984           { gen_helper_sve_st4bb_r,
4985             gen_helper_sve_st4hh_le_r,
4986             gen_helper_sve_st4ss_le_r,
4987             gen_helper_sve_st4dd_le_r } },
4988         { { gen_helper_sve_st2bb_r,
4989             gen_helper_sve_st2hh_be_r,
4990             gen_helper_sve_st2ss_be_r,
4991             gen_helper_sve_st2dd_be_r },
4992           { gen_helper_sve_st3bb_r,
4993             gen_helper_sve_st3hh_be_r,
4994             gen_helper_sve_st3ss_be_r,
4995             gen_helper_sve_st3dd_be_r },
4996           { gen_helper_sve_st4bb_r,
4997             gen_helper_sve_st4hh_be_r,
4998             gen_helper_sve_st4ss_be_r,
4999             gen_helper_sve_st4dd_be_r } },
5000     };
5001     gen_helper_gvec_mem *fn;
5002     int be = s->be_data == MO_BE;
5003
5004     if (nreg == 0) {
5005         /* ST1 */
5006         fn = fn_single[be][msz][esz];
5007     } else {
5008         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5009         assert(msz == esz);
5010         fn = fn_multiple[be][nreg - 1][msz];
5011     }
5012     assert(fn != NULL);
5013     do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5014 }
5015
5016 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5017 {
5018     if (a->rm == 31 || a->msz > a->esz) {
5019         return false;
5020     }
5021     if (sve_access_check(s)) {
5022         TCGv_i64 addr = new_tmp_a64(s);
5023         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5024         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5025         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5026     }
5027     return true;
5028 }
5029
5030 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5031 {
5032     if (a->msz > a->esz) {
5033         return false;
5034     }
5035     if (sve_access_check(s)) {
5036         int vsz = vec_full_reg_size(s);
5037         int elements = vsz >> a->esz;
5038         TCGv_i64 addr = new_tmp_a64(s);
5039
5040         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5041                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5042         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5043     }
5044     return true;
5045 }
5046
5047 /*
5048  *** SVE gather loads / scatter stores
5049  */
5050
5051 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5052                        int scale, TCGv_i64 scalar, int msz,
5053                        gen_helper_gvec_mem_scatter *fn)
5054 {
5055     unsigned vsz = vec_full_reg_size(s);
5056     TCGv_ptr t_zm = tcg_temp_new_ptr();
5057     TCGv_ptr t_pg = tcg_temp_new_ptr();
5058     TCGv_ptr t_zt = tcg_temp_new_ptr();
5059     TCGv_i32 t_desc;
5060     int desc;
5061
5062     desc = sve_memopidx(s, msz_dtype(msz));
5063     desc |= scale << MEMOPIDX_SHIFT;
5064     desc = simd_desc(vsz, vsz, desc);
5065     t_desc = tcg_const_i32(desc);
5066
5067     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5068     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5069     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5070     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5071
5072     tcg_temp_free_ptr(t_zt);
5073     tcg_temp_free_ptr(t_zm);
5074     tcg_temp_free_ptr(t_pg);
5075     tcg_temp_free_i32(t_desc);
5076 }
5077
5078 /* Indexed by [be][ff][xs][u][msz].  */
5079 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5080     /* Little-endian */
5081     { { { { gen_helper_sve_ldbss_zsu,
5082             gen_helper_sve_ldhss_le_zsu,
5083             NULL, },
5084           { gen_helper_sve_ldbsu_zsu,
5085             gen_helper_sve_ldhsu_le_zsu,
5086             gen_helper_sve_ldss_le_zsu, } },
5087         { { gen_helper_sve_ldbss_zss,
5088             gen_helper_sve_ldhss_le_zss,
5089             NULL, },
5090           { gen_helper_sve_ldbsu_zss,
5091             gen_helper_sve_ldhsu_le_zss,
5092             gen_helper_sve_ldss_le_zss, } } },
5093
5094       /* First-fault */
5095       { { { gen_helper_sve_ldffbss_zsu,
5096             gen_helper_sve_ldffhss_le_zsu,
5097             NULL, },
5098           { gen_helper_sve_ldffbsu_zsu,
5099             gen_helper_sve_ldffhsu_le_zsu,
5100             gen_helper_sve_ldffss_le_zsu, } },
5101         { { gen_helper_sve_ldffbss_zss,
5102             gen_helper_sve_ldffhss_le_zss,
5103             NULL, },
5104           { gen_helper_sve_ldffbsu_zss,
5105             gen_helper_sve_ldffhsu_le_zss,
5106             gen_helper_sve_ldffss_le_zss, } } } },
5107
5108     /* Big-endian */
5109     { { { { gen_helper_sve_ldbss_zsu,
5110             gen_helper_sve_ldhss_be_zsu,
5111             NULL, },
5112           { gen_helper_sve_ldbsu_zsu,
5113             gen_helper_sve_ldhsu_be_zsu,
5114             gen_helper_sve_ldss_be_zsu, } },
5115         { { gen_helper_sve_ldbss_zss,
5116             gen_helper_sve_ldhss_be_zss,
5117             NULL, },
5118           { gen_helper_sve_ldbsu_zss,
5119             gen_helper_sve_ldhsu_be_zss,
5120             gen_helper_sve_ldss_be_zss, } } },
5121
5122       /* First-fault */
5123       { { { gen_helper_sve_ldffbss_zsu,
5124             gen_helper_sve_ldffhss_be_zsu,
5125             NULL, },
5126           { gen_helper_sve_ldffbsu_zsu,
5127             gen_helper_sve_ldffhsu_be_zsu,
5128             gen_helper_sve_ldffss_be_zsu, } },
5129         { { gen_helper_sve_ldffbss_zss,
5130             gen_helper_sve_ldffhss_be_zss,
5131             NULL, },
5132           { gen_helper_sve_ldffbsu_zss,
5133             gen_helper_sve_ldffhsu_be_zss,
5134             gen_helper_sve_ldffss_be_zss, } } } },
5135 };
5136
5137 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5138 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5139     /* Little-endian */
5140     { { { { gen_helper_sve_ldbds_zsu,
5141             gen_helper_sve_ldhds_le_zsu,
5142             gen_helper_sve_ldsds_le_zsu,
5143             NULL, },
5144           { gen_helper_sve_ldbdu_zsu,
5145             gen_helper_sve_ldhdu_le_zsu,
5146             gen_helper_sve_ldsdu_le_zsu,
5147             gen_helper_sve_lddd_le_zsu, } },
5148         { { gen_helper_sve_ldbds_zss,
5149             gen_helper_sve_ldhds_le_zss,
5150             gen_helper_sve_ldsds_le_zss,
5151             NULL, },
5152           { gen_helper_sve_ldbdu_zss,
5153             gen_helper_sve_ldhdu_le_zss,
5154             gen_helper_sve_ldsdu_le_zss,
5155             gen_helper_sve_lddd_le_zss, } },
5156         { { gen_helper_sve_ldbds_zd,
5157             gen_helper_sve_ldhds_le_zd,
5158             gen_helper_sve_ldsds_le_zd,
5159             NULL, },
5160           { gen_helper_sve_ldbdu_zd,
5161             gen_helper_sve_ldhdu_le_zd,
5162             gen_helper_sve_ldsdu_le_zd,
5163             gen_helper_sve_lddd_le_zd, } } },
5164
5165       /* First-fault */
5166       { { { gen_helper_sve_ldffbds_zsu,
5167             gen_helper_sve_ldffhds_le_zsu,
5168             gen_helper_sve_ldffsds_le_zsu,
5169             NULL, },
5170           { gen_helper_sve_ldffbdu_zsu,
5171             gen_helper_sve_ldffhdu_le_zsu,
5172             gen_helper_sve_ldffsdu_le_zsu,
5173             gen_helper_sve_ldffdd_le_zsu, } },
5174         { { gen_helper_sve_ldffbds_zss,
5175             gen_helper_sve_ldffhds_le_zss,
5176             gen_helper_sve_ldffsds_le_zss,
5177             NULL, },
5178           { gen_helper_sve_ldffbdu_zss,
5179             gen_helper_sve_ldffhdu_le_zss,
5180             gen_helper_sve_ldffsdu_le_zss,
5181             gen_helper_sve_ldffdd_le_zss, } },
5182         { { gen_helper_sve_ldffbds_zd,
5183             gen_helper_sve_ldffhds_le_zd,
5184             gen_helper_sve_ldffsds_le_zd,
5185             NULL, },
5186           { gen_helper_sve_ldffbdu_zd,
5187             gen_helper_sve_ldffhdu_le_zd,
5188             gen_helper_sve_ldffsdu_le_zd,
5189             gen_helper_sve_ldffdd_le_zd, } } } },
5190
5191     /* Big-endian */
5192     { { { { gen_helper_sve_ldbds_zsu,
5193             gen_helper_sve_ldhds_be_zsu,
5194             gen_helper_sve_ldsds_be_zsu,
5195             NULL, },
5196           { gen_helper_sve_ldbdu_zsu,
5197             gen_helper_sve_ldhdu_be_zsu,
5198             gen_helper_sve_ldsdu_be_zsu,
5199             gen_helper_sve_lddd_be_zsu, } },
5200         { { gen_helper_sve_ldbds_zss,
5201             gen_helper_sve_ldhds_be_zss,
5202             gen_helper_sve_ldsds_be_zss,
5203             NULL, },
5204           { gen_helper_sve_ldbdu_zss,
5205             gen_helper_sve_ldhdu_be_zss,
5206             gen_helper_sve_ldsdu_be_zss,
5207             gen_helper_sve_lddd_be_zss, } },
5208         { { gen_helper_sve_ldbds_zd,
5209             gen_helper_sve_ldhds_be_zd,
5210             gen_helper_sve_ldsds_be_zd,
5211             NULL, },
5212           { gen_helper_sve_ldbdu_zd,
5213             gen_helper_sve_ldhdu_be_zd,
5214             gen_helper_sve_ldsdu_be_zd,
5215             gen_helper_sve_lddd_be_zd, } } },
5216
5217       /* First-fault */
5218       { { { gen_helper_sve_ldffbds_zsu,
5219             gen_helper_sve_ldffhds_be_zsu,
5220             gen_helper_sve_ldffsds_be_zsu,
5221             NULL, },
5222           { gen_helper_sve_ldffbdu_zsu,
5223             gen_helper_sve_ldffhdu_be_zsu,
5224             gen_helper_sve_ldffsdu_be_zsu,
5225             gen_helper_sve_ldffdd_be_zsu, } },
5226         { { gen_helper_sve_ldffbds_zss,
5227             gen_helper_sve_ldffhds_be_zss,
5228             gen_helper_sve_ldffsds_be_zss,
5229             NULL, },
5230           { gen_helper_sve_ldffbdu_zss,
5231             gen_helper_sve_ldffhdu_be_zss,
5232             gen_helper_sve_ldffsdu_be_zss,
5233             gen_helper_sve_ldffdd_be_zss, } },
5234         { { gen_helper_sve_ldffbds_zd,
5235             gen_helper_sve_ldffhds_be_zd,
5236             gen_helper_sve_ldffsds_be_zd,
5237             NULL, },
5238           { gen_helper_sve_ldffbdu_zd,
5239             gen_helper_sve_ldffhdu_be_zd,
5240             gen_helper_sve_ldffsdu_be_zd,
5241             gen_helper_sve_ldffdd_be_zd, } } } },
5242 };
5243
5244 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5245 {
5246     gen_helper_gvec_mem_scatter *fn = NULL;
5247     int be = s->be_data == MO_BE;
5248
5249     if (!sve_access_check(s)) {
5250         return true;
5251     }
5252
5253     switch (a->esz) {
5254     case MO_32:
5255         fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5256         break;
5257     case MO_64:
5258         fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5259         break;
5260     }
5261     assert(fn != NULL);
5262
5263     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5264                cpu_reg_sp(s, a->rn), a->msz, fn);
5265     return true;
5266 }
5267
5268 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5269 {
5270     gen_helper_gvec_mem_scatter *fn = NULL;
5271     int be = s->be_data == MO_BE;
5272     TCGv_i64 imm;
5273
5274     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5275         return false;
5276     }
5277     if (!sve_access_check(s)) {
5278         return true;
5279     }
5280
5281     switch (a->esz) {
5282     case MO_32:
5283         fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5284         break;
5285     case MO_64:
5286         fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5287         break;
5288     }
5289     assert(fn != NULL);
5290
5291     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5292      * by loading the immediate into the scalar parameter.
5293      */
5294     imm = tcg_const_i64(a->imm << a->msz);
5295     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5296     tcg_temp_free_i64(imm);
5297     return true;
5298 }
5299
5300 /* Indexed by [be][xs][msz].  */
5301 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5302     /* Little-endian */
5303     { { gen_helper_sve_stbs_zsu,
5304         gen_helper_sve_sths_le_zsu,
5305         gen_helper_sve_stss_le_zsu, },
5306       { gen_helper_sve_stbs_zss,
5307         gen_helper_sve_sths_le_zss,
5308         gen_helper_sve_stss_le_zss, } },
5309     /* Big-endian */
5310     { { gen_helper_sve_stbs_zsu,
5311         gen_helper_sve_sths_be_zsu,
5312         gen_helper_sve_stss_be_zsu, },
5313       { gen_helper_sve_stbs_zss,
5314         gen_helper_sve_sths_be_zss,
5315         gen_helper_sve_stss_be_zss, } },
5316 };
5317
5318 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5319 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5320     /* Little-endian */
5321     { { gen_helper_sve_stbd_zsu,
5322         gen_helper_sve_sthd_le_zsu,
5323         gen_helper_sve_stsd_le_zsu,
5324         gen_helper_sve_stdd_le_zsu, },
5325       { gen_helper_sve_stbd_zss,
5326         gen_helper_sve_sthd_le_zss,
5327         gen_helper_sve_stsd_le_zss,
5328         gen_helper_sve_stdd_le_zss, },
5329       { gen_helper_sve_stbd_zd,
5330         gen_helper_sve_sthd_le_zd,
5331         gen_helper_sve_stsd_le_zd,
5332         gen_helper_sve_stdd_le_zd, } },
5333     /* Big-endian */
5334     { { gen_helper_sve_stbd_zsu,
5335         gen_helper_sve_sthd_be_zsu,
5336         gen_helper_sve_stsd_be_zsu,
5337         gen_helper_sve_stdd_be_zsu, },
5338       { gen_helper_sve_stbd_zss,
5339         gen_helper_sve_sthd_be_zss,
5340         gen_helper_sve_stsd_be_zss,
5341         gen_helper_sve_stdd_be_zss, },
5342       { gen_helper_sve_stbd_zd,
5343         gen_helper_sve_sthd_be_zd,
5344         gen_helper_sve_stsd_be_zd,
5345         gen_helper_sve_stdd_be_zd, } },
5346 };
5347
5348 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5349 {
5350     gen_helper_gvec_mem_scatter *fn;
5351     int be = s->be_data == MO_BE;
5352
5353     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5354         return false;
5355     }
5356     if (!sve_access_check(s)) {
5357         return true;
5358     }
5359     switch (a->esz) {
5360     case MO_32:
5361         fn = scatter_store_fn32[be][a->xs][a->msz];
5362         break;
5363     case MO_64:
5364         fn = scatter_store_fn64[be][a->xs][a->msz];
5365         break;
5366     default:
5367         g_assert_not_reached();
5368     }
5369     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5370                cpu_reg_sp(s, a->rn), a->msz, fn);
5371     return true;
5372 }
5373
5374 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5375 {
5376     gen_helper_gvec_mem_scatter *fn = NULL;
5377     int be = s->be_data == MO_BE;
5378     TCGv_i64 imm;
5379
5380     if (a->esz < a->msz) {
5381         return false;
5382     }
5383     if (!sve_access_check(s)) {
5384         return true;
5385     }
5386
5387     switch (a->esz) {
5388     case MO_32:
5389         fn = scatter_store_fn32[be][0][a->msz];
5390         break;
5391     case MO_64:
5392         fn = scatter_store_fn64[be][2][a->msz];
5393         break;
5394     }
5395     assert(fn != NULL);
5396
5397     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5398      * by loading the immediate into the scalar parameter.
5399      */
5400     imm = tcg_const_i64(a->imm << a->msz);
5401     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5402     tcg_temp_free_i64(imm);
5403     return true;
5404 }
5405
5406 /*
5407  * Prefetches
5408  */
5409
5410 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5411 {
5412     /* Prefetch is a nop within QEMU.  */
5413     (void)sve_access_check(s);
5414     return true;
5415 }
5416
5417 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5418 {
5419     if (a->rm == 31) {
5420         return false;
5421     }
5422     /* Prefetch is a nop within QEMU.  */
5423     (void)sve_access_check(s);
5424     return true;
5425 }
5426
5427 /*
5428  * Move Prefix
5429  *
5430  * TODO: The implementation so far could handle predicated merging movprfx.
5431  * The helper functions as written take an extra source register to
5432  * use in the operation, but the result is only written when predication
5433  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5434  * to allow the final write back to the destination to be unconditional.
5435  * For predicated zeroing movprfx, we need to rearrange the helpers to
5436  * allow the final write back to zero inactives.
5437  *
5438  * In the meantime, just emit the moves.
5439  */
5440
5441 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5442 {
5443     return do_mov_z(s, a->rd, a->rn);
5444 }
5445
5446 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5447 {
5448     if (sve_access_check(s)) {
5449         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5450     }
5451     return true;
5452 }
5453
5454 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5455 {
5456     if (sve_access_check(s)) {
5457         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5458     }
5459     return true;
5460 }