target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 #define DO_ZPZZ(NAME, name) \
 355 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 356                                 uint32_t insn)                            \
 357 {                                                                         \
 358     static gen_helper_gvec_4 * const fns[4] = {                           \
 359         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 360         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 361     };                                                                    \
 362     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 363 }
 364
 365 DO_ZPZZ(AND, and)
 366 DO_ZPZZ(EOR, eor)
 367 DO_ZPZZ(ORR, orr)
 368 DO_ZPZZ(BIC, bic)
 369
 370 DO_ZPZZ(ADD, add)
 371 DO_ZPZZ(SUB, sub)
 372
 373 DO_ZPZZ(SMAX, smax)
 374 DO_ZPZZ(UMAX, umax)
 375 DO_ZPZZ(SMIN, smin)
 376 DO_ZPZZ(UMIN, umin)
 377 DO_ZPZZ(SABD, sabd)
 378 DO_ZPZZ(UABD, uabd)
 379
 380 DO_ZPZZ(MUL, mul)
 381 DO_ZPZZ(SMULH, smulh)
 382 DO_ZPZZ(UMULH, umulh)
 383
 384 DO_ZPZZ(ASR, asr)
 385 DO_ZPZZ(LSR, lsr)
 386 DO_ZPZZ(LSL, lsl)
 387
 388 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 389 {
 390     static gen_helper_gvec_4 * const fns[4] = {
 391         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 392     };
 393     return do_zpzz_ool(s, a, fns[a->esz]);
 394 }
 395
 396 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 397 {
 398     static gen_helper_gvec_4 * const fns[4] = {
 399         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 400     };
 401     return do_zpzz_ool(s, a, fns[a->esz]);
 402 }
 403
 404 DO_ZPZZ(SEL, sel)
 405
 406 #undef DO_ZPZZ
 407
 408 /*
 409  *** SVE Integer Arithmetic - Unary Predicated Group
 410  */
 411
 412 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 413 {
 414     if (fn == NULL) {
 415         return false;
 416     }
 417     if (sve_access_check(s)) {
 418         unsigned vsz = vec_full_reg_size(s);
 419         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 420                            vec_full_reg_offset(s, a->rn),
 421                            pred_full_reg_offset(s, a->pg),
 422                            vsz, vsz, 0, fn);
 423     }
 424     return true;
 425 }
 426
 427 #define DO_ZPZ(NAME, name) \
 428 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 429 {                                                                   \
 430     static gen_helper_gvec_3 * const fns[4] = {                     \
 431         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 432         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 433     };                                                              \
 434     return do_zpz_ool(s, a, fns[a->esz]);                           \
 435 }
 436
 437 DO_ZPZ(CLS, cls)
 438 DO_ZPZ(CLZ, clz)
 439 DO_ZPZ(CNT_zpz, cnt_zpz)
 440 DO_ZPZ(CNOT, cnot)
 441 DO_ZPZ(NOT_zpz, not_zpz)
 442 DO_ZPZ(ABS, abs)
 443 DO_ZPZ(NEG, neg)
 444
 445 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 446 {
 447     static gen_helper_gvec_3 * const fns[4] = {
 448         NULL,
 449         gen_helper_sve_fabs_h,
 450         gen_helper_sve_fabs_s,
 451         gen_helper_sve_fabs_d
 452     };
 453     return do_zpz_ool(s, a, fns[a->esz]);
 454 }
 455
 456 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 457 {
 458     static gen_helper_gvec_3 * const fns[4] = {
 459         NULL,
 460         gen_helper_sve_fneg_h,
 461         gen_helper_sve_fneg_s,
 462         gen_helper_sve_fneg_d
 463     };
 464     return do_zpz_ool(s, a, fns[a->esz]);
 465 }
 466
 467 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 468 {
 469     static gen_helper_gvec_3 * const fns[4] = {
 470         NULL,
 471         gen_helper_sve_sxtb_h,
 472         gen_helper_sve_sxtb_s,
 473         gen_helper_sve_sxtb_d
 474     };
 475     return do_zpz_ool(s, a, fns[a->esz]);
 476 }
 477
 478 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 479 {
 480     static gen_helper_gvec_3 * const fns[4] = {
 481         NULL,
 482         gen_helper_sve_uxtb_h,
 483         gen_helper_sve_uxtb_s,
 484         gen_helper_sve_uxtb_d
 485     };
 486     return do_zpz_ool(s, a, fns[a->esz]);
 487 }
 488
 489 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 490 {
 491     static gen_helper_gvec_3 * const fns[4] = {
 492         NULL, NULL,
 493         gen_helper_sve_sxth_s,
 494         gen_helper_sve_sxth_d
 495     };
 496     return do_zpz_ool(s, a, fns[a->esz]);
 497 }
 498
 499 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 500 {
 501     static gen_helper_gvec_3 * const fns[4] = {
 502         NULL, NULL,
 503         gen_helper_sve_uxth_s,
 504         gen_helper_sve_uxth_d
 505     };
 506     return do_zpz_ool(s, a, fns[a->esz]);
 507 }
 508
 509 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 510 {
 511     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 512 }
 513
 514 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 515 {
 516     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 517 }
 518
 519 #undef DO_ZPZ
 520
 521 /*
 522  *** SVE Integer Reduction Group
 523  */
 524
 525 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 526 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 527                        gen_helper_gvec_reduc *fn)
 528 {
 529     unsigned vsz = vec_full_reg_size(s);
 530     TCGv_ptr t_zn, t_pg;
 531     TCGv_i32 desc;
 532     TCGv_i64 temp;
 533
 534     if (fn == NULL) {
 535         return false;
 536     }
 537     if (!sve_access_check(s)) {
 538         return true;
 539     }
 540
 541     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 542     temp = tcg_temp_new_i64();
 543     t_zn = tcg_temp_new_ptr();
 544     t_pg = tcg_temp_new_ptr();
 545
 546     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 547     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 548     fn(temp, t_zn, t_pg, desc);
 549     tcg_temp_free_ptr(t_zn);
 550     tcg_temp_free_ptr(t_pg);
 551     tcg_temp_free_i32(desc);
 552
 553     write_fp_dreg(s, a->rd, temp);
 554     tcg_temp_free_i64(temp);
 555     return true;
 556 }
 557
 558 #define DO_VPZ(NAME, name) \
 559 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 560 {                                                                        \
 561     static gen_helper_gvec_reduc * const fns[4] = {                      \
 562         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 563         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 564     };                                                                   \
 565     return do_vpz_ool(s, a, fns[a->esz]);                                \
 566 }
 567
 568 DO_VPZ(ORV, orv)
 569 DO_VPZ(ANDV, andv)
 570 DO_VPZ(EORV, eorv)
 571
 572 DO_VPZ(UADDV, uaddv)
 573 DO_VPZ(SMAXV, smaxv)
 574 DO_VPZ(UMAXV, umaxv)
 575 DO_VPZ(SMINV, sminv)
 576 DO_VPZ(UMINV, uminv)
 577
 578 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 579 {
 580     static gen_helper_gvec_reduc * const fns[4] = {
 581         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 582         gen_helper_sve_saddv_s, NULL
 583     };
 584     return do_vpz_ool(s, a, fns[a->esz]);
 585 }
 586
 587 #undef DO_VPZ
 588
 589 /*
 590  *** SVE Shift by Immediate - Predicated Group
 591  */
 592
 593 /* Store zero into every active element of Zd.  We will use this for two
 594  * and three-operand predicated instructions for which logic dictates a
 595  * zero result.
 596  */
 597 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 598 {
 599     static gen_helper_gvec_2 * const fns[4] = {
 600         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 601         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 602     };
 603     if (sve_access_check(s)) {
 604         unsigned vsz = vec_full_reg_size(s);
 605         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 606                            pred_full_reg_offset(s, pg),
 607                            vsz, vsz, 0, fns[esz]);
 608     }
 609     return true;
 610 }
 611
 612 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 613 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 614 {
 615     static gen_helper_gvec_3 * const fns[4] = {
 616         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 617         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 618     };
 619     unsigned vsz = vec_full_reg_size(s);
 620     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 621                        vec_full_reg_offset(s, rn),
 622                        pred_full_reg_offset(s, pg),
 623                        vsz, vsz, 0, fns[esz]);
 624 }
 625
 626 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 627                         gen_helper_gvec_3 *fn)
 628 {
 629     if (sve_access_check(s)) {
 630         unsigned vsz = vec_full_reg_size(s);
 631         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 632                            vec_full_reg_offset(s, a->rn),
 633                            pred_full_reg_offset(s, a->pg),
 634                            vsz, vsz, a->imm, fn);
 635     }
 636     return true;
 637 }
 638
 639 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 640 {
 641     static gen_helper_gvec_3 * const fns[4] = {
 642         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 643         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 644     };
 645     if (a->esz < 0) {
 646         /* Invalid tsz encoding -- see tszimm_esz. */
 647         return false;
 648     }
 649     /* Shift by element size is architecturally valid.  For
 650        arithmetic right-shift, it's the same as by one less. */
 651     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 652     return do_zpzi_ool(s, a, fns[a->esz]);
 653 }
 654
 655 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 656 {
 657     static gen_helper_gvec_3 * const fns[4] = {
 658         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 659         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 660     };
 661     if (a->esz < 0) {
 662         return false;
 663     }
 664     /* Shift by element size is architecturally valid.
 665        For logical shifts, it is a zeroing operation.  */
 666     if (a->imm >= (8 << a->esz)) {
 667         return do_clr_zp(s, a->rd, a->pg, a->esz);
 668     } else {
 669         return do_zpzi_ool(s, a, fns[a->esz]);
 670     }
 671 }
 672
 673 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 677         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 695         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.  For arithmetic
 701        right shift for division, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 /*
 710  *** SVE Bitwise Shift - Predicated Group
 711  */
 712
 713 #define DO_ZPZW(NAME, name) \
 714 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 715                                 uint32_t insn)                            \
 716 {                                                                         \
 717     static gen_helper_gvec_4 * const fns[3] = {                           \
 718         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 719         gen_helper_sve_##name##_zpzw_s,                                   \
 720     };                                                                    \
 721     if (a->esz < 0 || a->esz >= 3) {                                      \
 722         return false;                                                     \
 723     }                                                                     \
 724     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 725 }
 726
 727 DO_ZPZW(ASR, asr)
 728 DO_ZPZW(LSR, lsr)
 729 DO_ZPZW(LSL, lsl)
 730
 731 #undef DO_ZPZW
 732
 733 /*
 734  *** SVE Bitwise Shift - Unpredicated Group
 735  */
 736
 737 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 738                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 739                                          int64_t, uint32_t, uint32_t))
 740 {
 741     if (a->esz < 0) {
 742         /* Invalid tsz encoding -- see tszimm_esz. */
 743         return false;
 744     }
 745     if (sve_access_check(s)) {
 746         unsigned vsz = vec_full_reg_size(s);
 747         /* Shift by element size is architecturally valid.  For
 748            arithmetic right-shift, it's the same as by one less.
 749            Otherwise it is a zeroing operation.  */
 750         if (a->imm >= 8 << a->esz) {
 751             if (asr) {
 752                 a->imm = (8 << a->esz) - 1;
 753             } else {
 754                 do_dupi_z(s, a->rd, 0);
 755                 return true;
 756             }
 757         }
 758         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 759                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 760     }
 761     return true;
 762 }
 763
 764 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 765 {
 766     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 767 }
 768
 769 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 770 {
 771     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 772 }
 773
 774 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 775 {
 776     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 777 }
 778
 779 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 780 {
 781     if (fn == NULL) {
 782         return false;
 783     }
 784     if (sve_access_check(s)) {
 785         unsigned vsz = vec_full_reg_size(s);
 786         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 787                            vec_full_reg_offset(s, a->rn),
 788                            vec_full_reg_offset(s, a->rm),
 789                            vsz, vsz, 0, fn);
 790     }
 791     return true;
 792 }
 793
 794 #define DO_ZZW(NAME, name) \
 795 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 796                                uint32_t insn)                             \
 797 {                                                                         \
 798     static gen_helper_gvec_3 * const fns[4] = {                           \
 799         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 800         gen_helper_sve_##name##_zzw_s, NULL                               \
 801     };                                                                    \
 802     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 803 }
 804
 805 DO_ZZW(ASR, asr)
 806 DO_ZZW(LSR, lsr)
 807 DO_ZZW(LSL, lsl)
 808
 809 #undef DO_ZZW
 810
 811 /*
 812  *** SVE Integer Multiply-Add Group
 813  */
 814
 815 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 816                          gen_helper_gvec_5 *fn)
 817 {
 818     if (sve_access_check(s)) {
 819         unsigned vsz = vec_full_reg_size(s);
 820         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 821                            vec_full_reg_offset(s, a->ra),
 822                            vec_full_reg_offset(s, a->rn),
 823                            vec_full_reg_offset(s, a->rm),
 824                            pred_full_reg_offset(s, a->pg),
 825                            vsz, vsz, 0, fn);
 826     }
 827     return true;
 828 }
 829
 830 #define DO_ZPZZZ(NAME, name) \
 831 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 832 {                                                                    \
 833     static gen_helper_gvec_5 * const fns[4] = {                      \
 834         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 835         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 836     };                                                               \
 837     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 838 }
 839
 840 DO_ZPZZZ(MLA, mla)
 841 DO_ZPZZZ(MLS, mls)
 842
 843 #undef DO_ZPZZZ
 844
 845 /*
 846  *** SVE Index Generation Group
 847  */
 848
 849 static void do_index(DisasContext *s, int esz, int rd,
 850                      TCGv_i64 start, TCGv_i64 incr)
 851 {
 852     unsigned vsz = vec_full_reg_size(s);
 853     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 854     TCGv_ptr t_zd = tcg_temp_new_ptr();
 855
 856     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 857     if (esz == 3) {
 858         gen_helper_sve_index_d(t_zd, start, incr, desc);
 859     } else {
 860         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 861         static index_fn * const fns[3] = {
 862             gen_helper_sve_index_b,
 863             gen_helper_sve_index_h,
 864             gen_helper_sve_index_s,
 865         };
 866         TCGv_i32 s32 = tcg_temp_new_i32();
 867         TCGv_i32 i32 = tcg_temp_new_i32();
 868
 869         tcg_gen_extrl_i64_i32(s32, start);
 870         tcg_gen_extrl_i64_i32(i32, incr);
 871         fns[esz](t_zd, s32, i32, desc);
 872
 873         tcg_temp_free_i32(s32);
 874         tcg_temp_free_i32(i32);
 875     }
 876     tcg_temp_free_ptr(t_zd);
 877     tcg_temp_free_i32(desc);
 878 }
 879
 880 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 881 {
 882     if (sve_access_check(s)) {
 883         TCGv_i64 start = tcg_const_i64(a->imm1);
 884         TCGv_i64 incr = tcg_const_i64(a->imm2);
 885         do_index(s, a->esz, a->rd, start, incr);
 886         tcg_temp_free_i64(start);
 887         tcg_temp_free_i64(incr);
 888     }
 889     return true;
 890 }
 891
 892 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 893 {
 894     if (sve_access_check(s)) {
 895         TCGv_i64 start = tcg_const_i64(a->imm);
 896         TCGv_i64 incr = cpu_reg(s, a->rm);
 897         do_index(s, a->esz, a->rd, start, incr);
 898         tcg_temp_free_i64(start);
 899     }
 900     return true;
 901 }
 902
 903 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = cpu_reg(s, a->rn);
 907         TCGv_i64 incr = tcg_const_i64(a->imm);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(incr);
 910     }
 911     return true;
 912 }
 913
 914 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 915 {
 916     if (sve_access_check(s)) {
 917         TCGv_i64 start = cpu_reg(s, a->rn);
 918         TCGv_i64 incr = cpu_reg(s, a->rm);
 919         do_index(s, a->esz, a->rd, start, incr);
 920     }
 921     return true;
 922 }
 923
 924 /*
 925  *** SVE Stack Allocation Group
 926  */
 927
 928 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 929 {
 930     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 931     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 932     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 933     return true;
 934 }
 935
 936 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 937 {
 938     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 939     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 940     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 941     return true;
 942 }
 943
 944 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 945 {
 946     TCGv_i64 reg = cpu_reg(s, a->rd);
 947     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 948     return true;
 949 }
 950
 951 /*
 952  *** SVE Compute Vector Address Group
 953  */
 954
 955 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 956 {
 957     if (sve_access_check(s)) {
 958         unsigned vsz = vec_full_reg_size(s);
 959         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 960                            vec_full_reg_offset(s, a->rn),
 961                            vec_full_reg_offset(s, a->rm),
 962                            vsz, vsz, a->imm, fn);
 963     }
 964     return true;
 965 }
 966
 967 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 968 {
 969     return do_adr(s, a, gen_helper_sve_adr_p32);
 970 }
 971
 972 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 973 {
 974     return do_adr(s, a, gen_helper_sve_adr_p64);
 975 }
 976
 977 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 978 {
 979     return do_adr(s, a, gen_helper_sve_adr_s32);
 980 }
 981
 982 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 983 {
 984     return do_adr(s, a, gen_helper_sve_adr_u32);
 985 }
 986
 987 /*
 988  *** SVE Integer Misc - Unpredicated Group
 989  */
 990
 991 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 992 {
 993     static gen_helper_gvec_2 * const fns[4] = {
 994         NULL,
 995         gen_helper_sve_fexpa_h,
 996         gen_helper_sve_fexpa_s,
 997         gen_helper_sve_fexpa_d,
 998     };
 999     if (a->esz == 0) {
1000         return false;
1001     }
1002     if (sve_access_check(s)) {
1003         unsigned vsz = vec_full_reg_size(s);
1004         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1005                            vec_full_reg_offset(s, a->rn),
1006                            vsz, vsz, 0, fns[a->esz]);
1007     }
1008     return true;
1009 }
1010
1011 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1012 {
1013     static gen_helper_gvec_3 * const fns[4] = {
1014         NULL,
1015         gen_helper_sve_ftssel_h,
1016         gen_helper_sve_ftssel_s,
1017         gen_helper_sve_ftssel_d,
1018     };
1019     if (a->esz == 0) {
1020         return false;
1021     }
1022     if (sve_access_check(s)) {
1023         unsigned vsz = vec_full_reg_size(s);
1024         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1025                            vec_full_reg_offset(s, a->rn),
1026                            vec_full_reg_offset(s, a->rm),
1027                            vsz, vsz, 0, fns[a->esz]);
1028     }
1029     return true;
1030 }
1031
1032 /*
1033  *** SVE Predicate Logical Operations Group
1034  */
1035
1036 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1037                           const GVecGen4 *gvec_op)
1038 {
1039     if (!sve_access_check(s)) {
1040         return true;
1041     }
1042
1043     unsigned psz = pred_gvec_reg_size(s);
1044     int dofs = pred_full_reg_offset(s, a->rd);
1045     int nofs = pred_full_reg_offset(s, a->rn);
1046     int mofs = pred_full_reg_offset(s, a->rm);
1047     int gofs = pred_full_reg_offset(s, a->pg);
1048
1049     if (psz == 8) {
1050         /* Do the operation and the flags generation in temps.  */
1051         TCGv_i64 pd = tcg_temp_new_i64();
1052         TCGv_i64 pn = tcg_temp_new_i64();
1053         TCGv_i64 pm = tcg_temp_new_i64();
1054         TCGv_i64 pg = tcg_temp_new_i64();
1055
1056         tcg_gen_ld_i64(pn, cpu_env, nofs);
1057         tcg_gen_ld_i64(pm, cpu_env, mofs);
1058         tcg_gen_ld_i64(pg, cpu_env, gofs);
1059
1060         gvec_op->fni8(pd, pn, pm, pg);
1061         tcg_gen_st_i64(pd, cpu_env, dofs);
1062
1063         do_predtest1(pd, pg);
1064
1065         tcg_temp_free_i64(pd);
1066         tcg_temp_free_i64(pn);
1067         tcg_temp_free_i64(pm);
1068         tcg_temp_free_i64(pg);
1069     } else {
1070         /* The operation and flags generation is large.  The computation
1071          * of the flags depends on the original contents of the guarding
1072          * predicate.  If the destination overwrites the guarding predicate,
1073          * then the easiest way to get this right is to save a copy.
1074           */
1075         int tofs = gofs;
1076         if (a->rd == a->pg) {
1077             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1078             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1079         }
1080
1081         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1082         do_predtest(s, dofs, tofs, psz / 8);
1083     }
1084     return true;
1085 }
1086
1087 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1088 {
1089     tcg_gen_and_i64(pd, pn, pm);
1090     tcg_gen_and_i64(pd, pd, pg);
1091 }
1092
1093 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1094                            TCGv_vec pm, TCGv_vec pg)
1095 {
1096     tcg_gen_and_vec(vece, pd, pn, pm);
1097     tcg_gen_and_vec(vece, pd, pd, pg);
1098 }
1099
1100 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1101 {
1102     static const GVecGen4 op = {
1103         .fni8 = gen_and_pg_i64,
1104         .fniv = gen_and_pg_vec,
1105         .fno = gen_helper_sve_and_pppp,
1106         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1107     };
1108     if (a->s) {
1109         return do_pppp_flags(s, a, &op);
1110     } else if (a->rn == a->rm) {
1111         if (a->pg == a->rn) {
1112             return do_mov_p(s, a->rd, a->rn);
1113         } else {
1114             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1115         }
1116     } else if (a->pg == a->rn || a->pg == a->rm) {
1117         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1118     } else {
1119         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1120     }
1121 }
1122
1123 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1124 {
1125     tcg_gen_andc_i64(pd, pn, pm);
1126     tcg_gen_and_i64(pd, pd, pg);
1127 }
1128
1129 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1130                            TCGv_vec pm, TCGv_vec pg)
1131 {
1132     tcg_gen_andc_vec(vece, pd, pn, pm);
1133     tcg_gen_and_vec(vece, pd, pd, pg);
1134 }
1135
1136 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1137 {
1138     static const GVecGen4 op = {
1139         .fni8 = gen_bic_pg_i64,
1140         .fniv = gen_bic_pg_vec,
1141         .fno = gen_helper_sve_bic_pppp,
1142         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1143     };
1144     if (a->s) {
1145         return do_pppp_flags(s, a, &op);
1146     } else if (a->pg == a->rn) {
1147         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1148     } else {
1149         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1150     }
1151 }
1152
1153 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 {
1155     tcg_gen_xor_i64(pd, pn, pm);
1156     tcg_gen_and_i64(pd, pd, pg);
1157 }
1158
1159 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1160                            TCGv_vec pm, TCGv_vec pg)
1161 {
1162     tcg_gen_xor_vec(vece, pd, pn, pm);
1163     tcg_gen_and_vec(vece, pd, pd, pg);
1164 }
1165
1166 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1167 {
1168     static const GVecGen4 op = {
1169         .fni8 = gen_eor_pg_i64,
1170         .fniv = gen_eor_pg_vec,
1171         .fno = gen_helper_sve_eor_pppp,
1172         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1173     };
1174     if (a->s) {
1175         return do_pppp_flags(s, a, &op);
1176     } else {
1177         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1178     }
1179 }
1180
1181 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1182 {
1183     tcg_gen_and_i64(pn, pn, pg);
1184     tcg_gen_andc_i64(pm, pm, pg);
1185     tcg_gen_or_i64(pd, pn, pm);
1186 }
1187
1188 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1189                            TCGv_vec pm, TCGv_vec pg)
1190 {
1191     tcg_gen_and_vec(vece, pn, pn, pg);
1192     tcg_gen_andc_vec(vece, pm, pm, pg);
1193     tcg_gen_or_vec(vece, pd, pn, pm);
1194 }
1195
1196 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 {
1198     static const GVecGen4 op = {
1199         .fni8 = gen_sel_pg_i64,
1200         .fniv = gen_sel_pg_vec,
1201         .fno = gen_helper_sve_sel_pppp,
1202         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203     };
1204     if (a->s) {
1205         return false;
1206     } else {
1207         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1208     }
1209 }
1210
1211 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1212 {
1213     tcg_gen_or_i64(pd, pn, pm);
1214     tcg_gen_and_i64(pd, pd, pg);
1215 }
1216
1217 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1218                            TCGv_vec pm, TCGv_vec pg)
1219 {
1220     tcg_gen_or_vec(vece, pd, pn, pm);
1221     tcg_gen_and_vec(vece, pd, pd, pg);
1222 }
1223
1224 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1225 {
1226     static const GVecGen4 op = {
1227         .fni8 = gen_orr_pg_i64,
1228         .fniv = gen_orr_pg_vec,
1229         .fno = gen_helper_sve_orr_pppp,
1230         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1231     };
1232     if (a->s) {
1233         return do_pppp_flags(s, a, &op);
1234     } else if (a->pg == a->rn && a->rn == a->rm) {
1235         return do_mov_p(s, a->rd, a->rn);
1236     } else {
1237         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1238     }
1239 }
1240
1241 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 {
1243     tcg_gen_orc_i64(pd, pn, pm);
1244     tcg_gen_and_i64(pd, pd, pg);
1245 }
1246
1247 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248                            TCGv_vec pm, TCGv_vec pg)
1249 {
1250     tcg_gen_orc_vec(vece, pd, pn, pm);
1251     tcg_gen_and_vec(vece, pd, pd, pg);
1252 }
1253
1254 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 {
1256     static const GVecGen4 op = {
1257         .fni8 = gen_orn_pg_i64,
1258         .fniv = gen_orn_pg_vec,
1259         .fno = gen_helper_sve_orn_pppp,
1260         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261     };
1262     if (a->s) {
1263         return do_pppp_flags(s, a, &op);
1264     } else {
1265         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1266     }
1267 }
1268
1269 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 {
1271     tcg_gen_or_i64(pd, pn, pm);
1272     tcg_gen_andc_i64(pd, pg, pd);
1273 }
1274
1275 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276                            TCGv_vec pm, TCGv_vec pg)
1277 {
1278     tcg_gen_or_vec(vece, pd, pn, pm);
1279     tcg_gen_andc_vec(vece, pd, pg, pd);
1280 }
1281
1282 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 {
1284     static const GVecGen4 op = {
1285         .fni8 = gen_nor_pg_i64,
1286         .fniv = gen_nor_pg_vec,
1287         .fno = gen_helper_sve_nor_pppp,
1288         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289     };
1290     if (a->s) {
1291         return do_pppp_flags(s, a, &op);
1292     } else {
1293         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1294     }
1295 }
1296
1297 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298 {
1299     tcg_gen_and_i64(pd, pn, pm);
1300     tcg_gen_andc_i64(pd, pg, pd);
1301 }
1302
1303 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1304                            TCGv_vec pm, TCGv_vec pg)
1305 {
1306     tcg_gen_and_vec(vece, pd, pn, pm);
1307     tcg_gen_andc_vec(vece, pd, pg, pd);
1308 }
1309
1310 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1311 {
1312     static const GVecGen4 op = {
1313         .fni8 = gen_nand_pg_i64,
1314         .fniv = gen_nand_pg_vec,
1315         .fno = gen_helper_sve_nand_pppp,
1316         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1317     };
1318     if (a->s) {
1319         return do_pppp_flags(s, a, &op);
1320     } else {
1321         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1322     }
1323 }
1324
1325 /*
1326  *** SVE Predicate Misc Group
1327  */
1328
1329 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1330 {
1331     if (sve_access_check(s)) {
1332         int nofs = pred_full_reg_offset(s, a->rn);
1333         int gofs = pred_full_reg_offset(s, a->pg);
1334         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1335
1336         if (words == 1) {
1337             TCGv_i64 pn = tcg_temp_new_i64();
1338             TCGv_i64 pg = tcg_temp_new_i64();
1339
1340             tcg_gen_ld_i64(pn, cpu_env, nofs);
1341             tcg_gen_ld_i64(pg, cpu_env, gofs);
1342             do_predtest1(pn, pg);
1343
1344             tcg_temp_free_i64(pn);
1345             tcg_temp_free_i64(pg);
1346         } else {
1347             do_predtest(s, nofs, gofs, words);
1348         }
1349     }
1350     return true;
1351 }
1352
1353 /* See the ARM pseudocode DecodePredCount.  */
1354 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1355 {
1356     unsigned elements = fullsz >> esz;
1357     unsigned bound;
1358
1359     switch (pattern) {
1360     case 0x0: /* POW2 */
1361         return pow2floor(elements);
1362     case 0x1: /* VL1 */
1363     case 0x2: /* VL2 */
1364     case 0x3: /* VL3 */
1365     case 0x4: /* VL4 */
1366     case 0x5: /* VL5 */
1367     case 0x6: /* VL6 */
1368     case 0x7: /* VL7 */
1369     case 0x8: /* VL8 */
1370         bound = pattern;
1371         break;
1372     case 0x9: /* VL16 */
1373     case 0xa: /* VL32 */
1374     case 0xb: /* VL64 */
1375     case 0xc: /* VL128 */
1376     case 0xd: /* VL256 */
1377         bound = 16 << (pattern - 9);
1378         break;
1379     case 0x1d: /* MUL4 */
1380         return elements - elements % 4;
1381     case 0x1e: /* MUL3 */
1382         return elements - elements % 3;
1383     case 0x1f: /* ALL */
1384         return elements;
1385     default:   /* #uimm5 */
1386         return 0;
1387     }
1388     return elements >= bound ? bound : 0;
1389 }
1390
1391 /* This handles all of the predicate initialization instructions,
1392  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1393  * so that decode_pred_count returns 0.  For SETFFR, we will have
1394  * set RD == 16 == FFR.
1395  */
1396 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1397 {
1398     if (!sve_access_check(s)) {
1399         return true;
1400     }
1401
1402     unsigned fullsz = vec_full_reg_size(s);
1403     unsigned ofs = pred_full_reg_offset(s, rd);
1404     unsigned numelem, setsz, i;
1405     uint64_t word, lastword;
1406     TCGv_i64 t;
1407
1408     numelem = decode_pred_count(fullsz, pat, esz);
1409
1410     /* Determine what we must store into each bit, and how many.  */
1411     if (numelem == 0) {
1412         lastword = word = 0;
1413         setsz = fullsz;
1414     } else {
1415         setsz = numelem << esz;
1416         lastword = word = pred_esz_masks[esz];
1417         if (setsz % 64) {
1418             lastword &= ~(-1ull << (setsz % 64));
1419         }
1420     }
1421
1422     t = tcg_temp_new_i64();
1423     if (fullsz <= 64) {
1424         tcg_gen_movi_i64(t, lastword);
1425         tcg_gen_st_i64(t, cpu_env, ofs);
1426         goto done;
1427     }
1428
1429     if (word == lastword) {
1430         unsigned maxsz = size_for_gvec(fullsz / 8);
1431         unsigned oprsz = size_for_gvec(setsz / 8);
1432
1433         if (oprsz * 8 == setsz) {
1434             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1435             goto done;
1436         }
1437         if (oprsz * 8 == setsz + 8) {
1438             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1439             tcg_gen_movi_i64(t, 0);
1440             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1441             goto done;
1442         }
1443     }
1444
1445     setsz /= 8;
1446     fullsz /= 8;
1447
1448     tcg_gen_movi_i64(t, word);
1449     for (i = 0; i < setsz; i += 8) {
1450         tcg_gen_st_i64(t, cpu_env, ofs + i);
1451     }
1452     if (lastword != word) {
1453         tcg_gen_movi_i64(t, lastword);
1454         tcg_gen_st_i64(t, cpu_env, ofs + i);
1455         i += 8;
1456     }
1457     if (i < fullsz) {
1458         tcg_gen_movi_i64(t, 0);
1459         for (; i < fullsz; i += 8) {
1460             tcg_gen_st_i64(t, cpu_env, ofs + i);
1461         }
1462     }
1463
1464  done:
1465     tcg_temp_free_i64(t);
1466
1467     /* PTRUES */
1468     if (setflag) {
1469         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1470         tcg_gen_movi_i32(cpu_CF, word == 0);
1471         tcg_gen_movi_i32(cpu_VF, 0);
1472         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1473     }
1474     return true;
1475 }
1476
1477 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1478 {
1479     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1480 }
1481
1482 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1483 {
1484     /* Note pat == 31 is #all, to set all elements.  */
1485     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1486 }
1487
1488 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1489 {
1490     /* Note pat == 32 is #unimp, to set no elements.  */
1491     return do_predset(s, 0, a->rd, 32, false);
1492 }
1493
1494 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1495 {
1496     /* The path through do_pppp_flags is complicated enough to want to avoid
1497      * duplication.  Frob the arguments into the form of a predicated AND.
1498      */
1499     arg_rprr_s alt_a = {
1500         .rd = a->rd, .pg = a->pg, .s = a->s,
1501         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1502     };
1503     return trans_AND_pppp(s, &alt_a, insn);
1504 }
1505
1506 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1507 {
1508     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1509 }
1510
1511 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1512 {
1513     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1514 }
1515
1516 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1517                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1518                                            TCGv_ptr, TCGv_i32))
1519 {
1520     if (!sve_access_check(s)) {
1521         return true;
1522     }
1523
1524     TCGv_ptr t_pd = tcg_temp_new_ptr();
1525     TCGv_ptr t_pg = tcg_temp_new_ptr();
1526     TCGv_i32 t;
1527     unsigned desc;
1528
1529     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1530     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1531
1532     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1533     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1534     t = tcg_const_i32(desc);
1535
1536     gen_fn(t, t_pd, t_pg, t);
1537     tcg_temp_free_ptr(t_pd);
1538     tcg_temp_free_ptr(t_pg);
1539
1540     do_pred_flags(t);
1541     tcg_temp_free_i32(t);
1542     return true;
1543 }
1544
1545 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1546 {
1547     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1548 }
1549
1550 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1551 {
1552     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1553 }
1554
1555 /*
1556  *** SVE Element Count Group
1557  */
1558
1559 /* Perform an inline saturating addition of a 32-bit value within
1560  * a 64-bit register.  The second operand is known to be positive,
1561  * which halves the comparisions we must perform to bound the result.
1562  */
1563 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1564 {
1565     int64_t ibound;
1566     TCGv_i64 bound;
1567     TCGCond cond;
1568
1569     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1570     if (u) {
1571         tcg_gen_ext32u_i64(reg, reg);
1572     } else {
1573         tcg_gen_ext32s_i64(reg, reg);
1574     }
1575     if (d) {
1576         tcg_gen_sub_i64(reg, reg, val);
1577         ibound = (u ? 0 : INT32_MIN);
1578         cond = TCG_COND_LT;
1579     } else {
1580         tcg_gen_add_i64(reg, reg, val);
1581         ibound = (u ? UINT32_MAX : INT32_MAX);
1582         cond = TCG_COND_GT;
1583     }
1584     bound = tcg_const_i64(ibound);
1585     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1586     tcg_temp_free_i64(bound);
1587 }
1588
1589 /* Similarly with 64-bit values.  */
1590 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1591 {
1592     TCGv_i64 t0 = tcg_temp_new_i64();
1593     TCGv_i64 t1 = tcg_temp_new_i64();
1594     TCGv_i64 t2;
1595
1596     if (u) {
1597         if (d) {
1598             tcg_gen_sub_i64(t0, reg, val);
1599             tcg_gen_movi_i64(t1, 0);
1600             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1601         } else {
1602             tcg_gen_add_i64(t0, reg, val);
1603             tcg_gen_movi_i64(t1, -1);
1604             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1605         }
1606     } else {
1607         if (d) {
1608             /* Detect signed overflow for subtraction.  */
1609             tcg_gen_xor_i64(t0, reg, val);
1610             tcg_gen_sub_i64(t1, reg, val);
1611             tcg_gen_xor_i64(reg, reg, t0);
1612             tcg_gen_and_i64(t0, t0, reg);
1613
1614             /* Bound the result.  */
1615             tcg_gen_movi_i64(reg, INT64_MIN);
1616             t2 = tcg_const_i64(0);
1617             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1618         } else {
1619             /* Detect signed overflow for addition.  */
1620             tcg_gen_xor_i64(t0, reg, val);
1621             tcg_gen_add_i64(reg, reg, val);
1622             tcg_gen_xor_i64(t1, reg, val);
1623             tcg_gen_andc_i64(t0, t1, t0);
1624
1625             /* Bound the result.  */
1626             tcg_gen_movi_i64(t1, INT64_MAX);
1627             t2 = tcg_const_i64(0);
1628             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1629         }
1630         tcg_temp_free_i64(t2);
1631     }
1632     tcg_temp_free_i64(t0);
1633     tcg_temp_free_i64(t1);
1634 }
1635
1636 /* Similarly with a vector and a scalar operand.  */
1637 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1638                               TCGv_i64 val, bool u, bool d)
1639 {
1640     unsigned vsz = vec_full_reg_size(s);
1641     TCGv_ptr dptr, nptr;
1642     TCGv_i32 t32, desc;
1643     TCGv_i64 t64;
1644
1645     dptr = tcg_temp_new_ptr();
1646     nptr = tcg_temp_new_ptr();
1647     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1648     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1649     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1650
1651     switch (esz) {
1652     case MO_8:
1653         t32 = tcg_temp_new_i32();
1654         tcg_gen_extrl_i64_i32(t32, val);
1655         if (d) {
1656             tcg_gen_neg_i32(t32, t32);
1657         }
1658         if (u) {
1659             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1660         } else {
1661             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1662         }
1663         tcg_temp_free_i32(t32);
1664         break;
1665
1666     case MO_16:
1667         t32 = tcg_temp_new_i32();
1668         tcg_gen_extrl_i64_i32(t32, val);
1669         if (d) {
1670             tcg_gen_neg_i32(t32, t32);
1671         }
1672         if (u) {
1673             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1674         } else {
1675             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1676         }
1677         tcg_temp_free_i32(t32);
1678         break;
1679
1680     case MO_32:
1681         t64 = tcg_temp_new_i64();
1682         if (d) {
1683             tcg_gen_neg_i64(t64, val);
1684         } else {
1685             tcg_gen_mov_i64(t64, val);
1686         }
1687         if (u) {
1688             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1689         } else {
1690             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1691         }
1692         tcg_temp_free_i64(t64);
1693         break;
1694
1695     case MO_64:
1696         if (u) {
1697             if (d) {
1698                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1699             } else {
1700                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1701             }
1702         } else if (d) {
1703             t64 = tcg_temp_new_i64();
1704             tcg_gen_neg_i64(t64, val);
1705             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1706             tcg_temp_free_i64(t64);
1707         } else {
1708             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1709         }
1710         break;
1711
1712     default:
1713         g_assert_not_reached();
1714     }
1715
1716     tcg_temp_free_ptr(dptr);
1717     tcg_temp_free_ptr(nptr);
1718     tcg_temp_free_i32(desc);
1719 }
1720
1721 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1722 {
1723     if (sve_access_check(s)) {
1724         unsigned fullsz = vec_full_reg_size(s);
1725         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1726         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1727     }
1728     return true;
1729 }
1730
1731 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1732 {
1733     if (sve_access_check(s)) {
1734         unsigned fullsz = vec_full_reg_size(s);
1735         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736         int inc = numelem * a->imm * (a->d ? -1 : 1);
1737         TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739         tcg_gen_addi_i64(reg, reg, inc);
1740     }
1741     return true;
1742 }
1743
1744 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1745                                uint32_t insn)
1746 {
1747     if (!sve_access_check(s)) {
1748         return true;
1749     }
1750
1751     unsigned fullsz = vec_full_reg_size(s);
1752     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753     int inc = numelem * a->imm;
1754     TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1757     if (inc == 0) {
1758         if (a->u) {
1759             tcg_gen_ext32u_i64(reg, reg);
1760         } else {
1761             tcg_gen_ext32s_i64(reg, reg);
1762         }
1763     } else {
1764         TCGv_i64 t = tcg_const_i64(inc);
1765         do_sat_addsub_32(reg, t, a->u, a->d);
1766         tcg_temp_free_i64(t);
1767     }
1768     return true;
1769 }
1770
1771 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1772                                uint32_t insn)
1773 {
1774     if (!sve_access_check(s)) {
1775         return true;
1776     }
1777
1778     unsigned fullsz = vec_full_reg_size(s);
1779     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780     int inc = numelem * a->imm;
1781     TCGv_i64 reg = cpu_reg(s, a->rd);
1782
1783     if (inc != 0) {
1784         TCGv_i64 t = tcg_const_i64(inc);
1785         do_sat_addsub_64(reg, t, a->u, a->d);
1786         tcg_temp_free_i64(t);
1787     }
1788     return true;
1789 }
1790
1791 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1792 {
1793     if (a->esz == 0) {
1794         return false;
1795     }
1796
1797     unsigned fullsz = vec_full_reg_size(s);
1798     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1799     int inc = numelem * a->imm;
1800
1801     if (inc != 0) {
1802         if (sve_access_check(s)) {
1803             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1804             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1805                               vec_full_reg_offset(s, a->rn),
1806                               t, fullsz, fullsz);
1807             tcg_temp_free_i64(t);
1808         }
1809     } else {
1810         do_mov_z(s, a->rd, a->rn);
1811     }
1812     return true;
1813 }
1814
1815 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1816                             uint32_t insn)
1817 {
1818     if (a->esz == 0) {
1819         return false;
1820     }
1821
1822     unsigned fullsz = vec_full_reg_size(s);
1823     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1824     int inc = numelem * a->imm;
1825
1826     if (inc != 0) {
1827         if (sve_access_check(s)) {
1828             TCGv_i64 t = tcg_const_i64(inc);
1829             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1830             tcg_temp_free_i64(t);
1831         }
1832     } else {
1833         do_mov_z(s, a->rd, a->rn);
1834     }
1835     return true;
1836 }
1837
1838 /*
1839  *** SVE Bitwise Immediate Group
1840  */
1841
1842 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1843 {
1844     uint64_t imm;
1845     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1846                                 extract32(a->dbm, 0, 6),
1847                                 extract32(a->dbm, 6, 6))) {
1848         return false;
1849     }
1850     if (sve_access_check(s)) {
1851         unsigned vsz = vec_full_reg_size(s);
1852         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1853                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1854     }
1855     return true;
1856 }
1857
1858 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1859 {
1860     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1861 }
1862
1863 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1864 {
1865     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1866 }
1867
1868 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1869 {
1870     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1871 }
1872
1873 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1874 {
1875     uint64_t imm;
1876     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1877                                 extract32(a->dbm, 0, 6),
1878                                 extract32(a->dbm, 6, 6))) {
1879         return false;
1880     }
1881     if (sve_access_check(s)) {
1882         do_dupi_z(s, a->rd, imm);
1883     }
1884     return true;
1885 }
1886
1887 /*
1888  *** SVE Integer Wide Immediate - Predicated Group
1889  */
1890
1891 /* Implement all merging copies.  This is used for CPY (immediate),
1892  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1893  */
1894 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1895                      TCGv_i64 val)
1896 {
1897     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1898     static gen_cpy * const fns[4] = {
1899         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1900         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1901     };
1902     unsigned vsz = vec_full_reg_size(s);
1903     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1904     TCGv_ptr t_zd = tcg_temp_new_ptr();
1905     TCGv_ptr t_zn = tcg_temp_new_ptr();
1906     TCGv_ptr t_pg = tcg_temp_new_ptr();
1907
1908     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1909     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1910     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1911
1912     fns[esz](t_zd, t_zn, t_pg, val, desc);
1913
1914     tcg_temp_free_ptr(t_zd);
1915     tcg_temp_free_ptr(t_zn);
1916     tcg_temp_free_ptr(t_pg);
1917     tcg_temp_free_i32(desc);
1918 }
1919
1920 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1921 {
1922     if (a->esz == 0) {
1923         return false;
1924     }
1925     if (sve_access_check(s)) {
1926         /* Decode the VFP immediate.  */
1927         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1928         TCGv_i64 t_imm = tcg_const_i64(imm);
1929         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1930         tcg_temp_free_i64(t_imm);
1931     }
1932     return true;
1933 }
1934
1935 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1936 {
1937     if (a->esz == 0 && extract32(insn, 13, 1)) {
1938         return false;
1939     }
1940     if (sve_access_check(s)) {
1941         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1949 {
1950     static gen_helper_gvec_2i * const fns[4] = {
1951         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1952         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1953     };
1954
1955     if (a->esz == 0 && extract32(insn, 13, 1)) {
1956         return false;
1957     }
1958     if (sve_access_check(s)) {
1959         unsigned vsz = vec_full_reg_size(s);
1960         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1961         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1962                             pred_full_reg_offset(s, a->pg),
1963                             t_imm, vsz, vsz, 0, fns[a->esz]);
1964         tcg_temp_free_i64(t_imm);
1965     }
1966     return true;
1967 }
1968
1969 /*
1970  *** SVE Permute Extract Group
1971  */
1972
1973 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1974 {
1975     if (!sve_access_check(s)) {
1976         return true;
1977     }
1978
1979     unsigned vsz = vec_full_reg_size(s);
1980     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1981     unsigned n_siz = vsz - n_ofs;
1982     unsigned d = vec_full_reg_offset(s, a->rd);
1983     unsigned n = vec_full_reg_offset(s, a->rn);
1984     unsigned m = vec_full_reg_offset(s, a->rm);
1985
1986     /* Use host vector move insns if we have appropriate sizes
1987      * and no unfortunate overlap.
1988      */
1989     if (m != d
1990         && n_ofs == size_for_gvec(n_ofs)
1991         && n_siz == size_for_gvec(n_siz)
1992         && (d != n || n_siz <= n_ofs)) {
1993         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1994         if (n_ofs != 0) {
1995             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1996         }
1997     } else {
1998         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1999     }
2000     return true;
2001 }
2002
2003 /*
2004  *** SVE Permute - Unpredicated Group
2005  */
2006
2007 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2008 {
2009     if (sve_access_check(s)) {
2010         unsigned vsz = vec_full_reg_size(s);
2011         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2012                              vsz, vsz, cpu_reg_sp(s, a->rn));
2013     }
2014     return true;
2015 }
2016
2017 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2018 {
2019     if ((a->imm & 0x1f) == 0) {
2020         return false;
2021     }
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         unsigned dofs = vec_full_reg_offset(s, a->rd);
2025         unsigned esz, index;
2026
2027         esz = ctz32(a->imm);
2028         index = a->imm >> (esz + 1);
2029
2030         if ((index << esz) < vsz) {
2031             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2032             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2033         } else {
2034             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2035         }
2036     }
2037     return true;
2038 }
2039
2040 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2041 {
2042     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2043     static gen_insr * const fns[4] = {
2044         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2045         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2046     };
2047     unsigned vsz = vec_full_reg_size(s);
2048     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2049     TCGv_ptr t_zd = tcg_temp_new_ptr();
2050     TCGv_ptr t_zn = tcg_temp_new_ptr();
2051
2052     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2053     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2054
2055     fns[a->esz](t_zd, t_zn, val, desc);
2056
2057     tcg_temp_free_ptr(t_zd);
2058     tcg_temp_free_ptr(t_zn);
2059     tcg_temp_free_i32(desc);
2060 }
2061
2062 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2063 {
2064     if (sve_access_check(s)) {
2065         TCGv_i64 t = tcg_temp_new_i64();
2066         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2067         do_insr_i64(s, a, t);
2068         tcg_temp_free_i64(t);
2069     }
2070     return true;
2071 }
2072
2073 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2074 {
2075     if (sve_access_check(s)) {
2076         do_insr_i64(s, a, cpu_reg(s, a->rm));
2077     }
2078     return true;
2079 }
2080
2081 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2082 {
2083     static gen_helper_gvec_2 * const fns[4] = {
2084         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2085         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2086     };
2087
2088     if (sve_access_check(s)) {
2089         unsigned vsz = vec_full_reg_size(s);
2090         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2091                            vec_full_reg_offset(s, a->rn),
2092                            vsz, vsz, 0, fns[a->esz]);
2093     }
2094     return true;
2095 }
2096
2097 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2098 {
2099     static gen_helper_gvec_3 * const fns[4] = {
2100         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2101         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2102     };
2103
2104     if (sve_access_check(s)) {
2105         unsigned vsz = vec_full_reg_size(s);
2106         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2107                            vec_full_reg_offset(s, a->rn),
2108                            vec_full_reg_offset(s, a->rm),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2115 {
2116     static gen_helper_gvec_2 * const fns[4][2] = {
2117         { NULL, NULL },
2118         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2119         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2120         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2121     };
2122
2123     if (a->esz == 0) {
2124         return false;
2125     }
2126     if (sve_access_check(s)) {
2127         unsigned vsz = vec_full_reg_size(s);
2128         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2129                            vec_full_reg_offset(s, a->rn)
2130                            + (a->h ? vsz / 2 : 0),
2131                            vsz, vsz, 0, fns[a->esz][a->u]);
2132     }
2133     return true;
2134 }
2135
2136 /*
2137  *** SVE Permute - Predicates Group
2138  */
2139
2140 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2141                           gen_helper_gvec_3 *fn)
2142 {
2143     if (!sve_access_check(s)) {
2144         return true;
2145     }
2146
2147     unsigned vsz = pred_full_reg_size(s);
2148
2149     /* Predicate sizes may be smaller and cannot use simd_desc.
2150        We cannot round up, as we do elsewhere, because we need
2151        the exact size for ZIP2 and REV.  We retain the style for
2152        the other helpers for consistency.  */
2153     TCGv_ptr t_d = tcg_temp_new_ptr();
2154     TCGv_ptr t_n = tcg_temp_new_ptr();
2155     TCGv_ptr t_m = tcg_temp_new_ptr();
2156     TCGv_i32 t_desc;
2157     int desc;
2158
2159     desc = vsz - 2;
2160     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2161     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2162
2163     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2164     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2165     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2166     t_desc = tcg_const_i32(desc);
2167
2168     fn(t_d, t_n, t_m, t_desc);
2169
2170     tcg_temp_free_ptr(t_d);
2171     tcg_temp_free_ptr(t_n);
2172     tcg_temp_free_ptr(t_m);
2173     tcg_temp_free_i32(t_desc);
2174     return true;
2175 }
2176
2177 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2178                           gen_helper_gvec_2 *fn)
2179 {
2180     if (!sve_access_check(s)) {
2181         return true;
2182     }
2183
2184     unsigned vsz = pred_full_reg_size(s);
2185     TCGv_ptr t_d = tcg_temp_new_ptr();
2186     TCGv_ptr t_n = tcg_temp_new_ptr();
2187     TCGv_i32 t_desc;
2188     int desc;
2189
2190     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2191     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2192
2193     /* Predicate sizes may be smaller and cannot use simd_desc.
2194        We cannot round up, as we do elsewhere, because we need
2195        the exact size for ZIP2 and REV.  We retain the style for
2196        the other helpers for consistency.  */
2197
2198     desc = vsz - 2;
2199     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2200     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2201     t_desc = tcg_const_i32(desc);
2202
2203     fn(t_d, t_n, t_desc);
2204
2205     tcg_temp_free_i32(t_desc);
2206     tcg_temp_free_ptr(t_d);
2207     tcg_temp_free_ptr(t_n);
2208     return true;
2209 }
2210
2211 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2212 {
2213     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2214 }
2215
2216 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2217 {
2218     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2219 }
2220
2221 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2222 {
2223     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2224 }
2225
2226 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2227 {
2228     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2229 }
2230
2231 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2232 {
2233     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2234 }
2235
2236 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2237 {
2238     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2239 }
2240
2241 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2242 {
2243     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2244 }
2245
2246 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2247 {
2248     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2249 }
2250
2251 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2252 {
2253     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2254 }
2255
2256 /*
2257  *** SVE Permute - Interleaving Group
2258  */
2259
2260 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2261 {
2262     static gen_helper_gvec_3 * const fns[4] = {
2263         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2264         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2265     };
2266
2267     if (sve_access_check(s)) {
2268         unsigned vsz = vec_full_reg_size(s);
2269         unsigned high_ofs = high ? vsz / 2 : 0;
2270         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2271                            vec_full_reg_offset(s, a->rn) + high_ofs,
2272                            vec_full_reg_offset(s, a->rm) + high_ofs,
2273                            vsz, vsz, 0, fns[a->esz]);
2274     }
2275     return true;
2276 }
2277
2278 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2279                             gen_helper_gvec_3 *fn)
2280 {
2281     if (sve_access_check(s)) {
2282         unsigned vsz = vec_full_reg_size(s);
2283         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                            vec_full_reg_offset(s, a->rn),
2285                            vec_full_reg_offset(s, a->rm),
2286                            vsz, vsz, data, fn);
2287     }
2288     return true;
2289 }
2290
2291 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2292 {
2293     return do_zip(s, a, false);
2294 }
2295
2296 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2297 {
2298     return do_zip(s, a, true);
2299 }
2300
2301 static gen_helper_gvec_3 * const uzp_fns[4] = {
2302     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2303     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2304 };
2305
2306 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2307 {
2308     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2309 }
2310
2311 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2312 {
2313     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2314 }
2315
2316 static gen_helper_gvec_3 * const trn_fns[4] = {
2317     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2318     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2319 };
2320
2321 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2322 {
2323     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2324 }
2325
2326 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2327 {
2328     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2329 }
2330
2331 /*
2332  *** SVE Permute Vector - Predicated Group
2333  */
2334
2335 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2336 {
2337     static gen_helper_gvec_3 * const fns[4] = {
2338         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2339     };
2340     return do_zpz_ool(s, a, fns[a->esz]);
2341 }
2342
2343 /* Call the helper that computes the ARM LastActiveElement pseudocode
2344  * function, scaled by the element size.  This includes the not found
2345  * indication; e.g. not found for esz=3 is -8.
2346  */
2347 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2348 {
2349     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2350      * round up, as we do elsewhere, because we need the exact size.
2351      */
2352     TCGv_ptr t_p = tcg_temp_new_ptr();
2353     TCGv_i32 t_desc;
2354     unsigned vsz = pred_full_reg_size(s);
2355     unsigned desc;
2356
2357     desc = vsz - 2;
2358     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2359
2360     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2361     t_desc = tcg_const_i32(desc);
2362
2363     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2364
2365     tcg_temp_free_i32(t_desc);
2366     tcg_temp_free_ptr(t_p);
2367 }
2368
2369 /* Increment LAST to the offset of the next element in the vector,
2370  * wrapping around to 0.
2371  */
2372 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 {
2374     unsigned vsz = vec_full_reg_size(s);
2375
2376     tcg_gen_addi_i32(last, last, 1 << esz);
2377     if (is_power_of_2(vsz)) {
2378         tcg_gen_andi_i32(last, last, vsz - 1);
2379     } else {
2380         TCGv_i32 max = tcg_const_i32(vsz);
2381         TCGv_i32 zero = tcg_const_i32(0);
2382         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2383         tcg_temp_free_i32(max);
2384         tcg_temp_free_i32(zero);
2385     }
2386 }
2387
2388 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2389 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     if (is_power_of_2(vsz)) {
2394         tcg_gen_andi_i32(last, last, vsz - 1);
2395     } else {
2396         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2397         TCGv_i32 zero = tcg_const_i32(0);
2398         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2399         tcg_temp_free_i32(max);
2400         tcg_temp_free_i32(zero);
2401     }
2402 }
2403
2404 /* Load an unsigned element of ESZ from BASE+OFS.  */
2405 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2406 {
2407     TCGv_i64 r = tcg_temp_new_i64();
2408
2409     switch (esz) {
2410     case 0:
2411         tcg_gen_ld8u_i64(r, base, ofs);
2412         break;
2413     case 1:
2414         tcg_gen_ld16u_i64(r, base, ofs);
2415         break;
2416     case 2:
2417         tcg_gen_ld32u_i64(r, base, ofs);
2418         break;
2419     case 3:
2420         tcg_gen_ld_i64(r, base, ofs);
2421         break;
2422     default:
2423         g_assert_not_reached();
2424     }
2425     return r;
2426 }
2427
2428 /* Load an unsigned element of ESZ from RM[LAST].  */
2429 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2430                                  int rm, int esz)
2431 {
2432     TCGv_ptr p = tcg_temp_new_ptr();
2433     TCGv_i64 r;
2434
2435     /* Convert offset into vector into offset into ENV.
2436      * The final adjustment for the vector register base
2437      * is added via constant offset to the load.
2438      */
2439 #ifdef HOST_WORDS_BIGENDIAN
2440     /* Adjust for element ordering.  See vec_reg_offset.  */
2441     if (esz < 3) {
2442         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2443     }
2444 #endif
2445     tcg_gen_ext_i32_ptr(p, last);
2446     tcg_gen_add_ptr(p, p, cpu_env);
2447
2448     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2449     tcg_temp_free_ptr(p);
2450
2451     return r;
2452 }
2453
2454 /* Compute CLAST for a Zreg.  */
2455 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2456 {
2457     TCGv_i32 last;
2458     TCGLabel *over;
2459     TCGv_i64 ele;
2460     unsigned vsz, esz = a->esz;
2461
2462     if (!sve_access_check(s)) {
2463         return true;
2464     }
2465
2466     last = tcg_temp_local_new_i32();
2467     over = gen_new_label();
2468
2469     find_last_active(s, last, esz, a->pg);
2470
2471     /* There is of course no movcond for a 2048-bit vector,
2472      * so we must branch over the actual store.
2473      */
2474     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2475
2476     if (!before) {
2477         incr_last_active(s, last, esz);
2478     }
2479
2480     ele = load_last_active(s, last, a->rm, esz);
2481     tcg_temp_free_i32(last);
2482
2483     vsz = vec_full_reg_size(s);
2484     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2485     tcg_temp_free_i64(ele);
2486
2487     /* If this insn used MOVPRFX, we may need a second move.  */
2488     if (a->rd != a->rn) {
2489         TCGLabel *done = gen_new_label();
2490         tcg_gen_br(done);
2491
2492         gen_set_label(over);
2493         do_mov_z(s, a->rd, a->rn);
2494
2495         gen_set_label(done);
2496     } else {
2497         gen_set_label(over);
2498     }
2499     return true;
2500 }
2501
2502 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2503 {
2504     return do_clast_vector(s, a, false);
2505 }
2506
2507 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2508 {
2509     return do_clast_vector(s, a, true);
2510 }
2511
2512 /* Compute CLAST for a scalar.  */
2513 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2514                             bool before, TCGv_i64 reg_val)
2515 {
2516     TCGv_i32 last = tcg_temp_new_i32();
2517     TCGv_i64 ele, cmp, zero;
2518
2519     find_last_active(s, last, esz, pg);
2520
2521     /* Extend the original value of last prior to incrementing.  */
2522     cmp = tcg_temp_new_i64();
2523     tcg_gen_ext_i32_i64(cmp, last);
2524
2525     if (!before) {
2526         incr_last_active(s, last, esz);
2527     }
2528
2529     /* The conceit here is that while last < 0 indicates not found, after
2530      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2531      * from which we can load garbage.  We then discard the garbage with
2532      * a conditional move.
2533      */
2534     ele = load_last_active(s, last, rm, esz);
2535     tcg_temp_free_i32(last);
2536
2537     zero = tcg_const_i64(0);
2538     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2539
2540     tcg_temp_free_i64(zero);
2541     tcg_temp_free_i64(cmp);
2542     tcg_temp_free_i64(ele);
2543 }
2544
2545 /* Compute CLAST for a Vreg.  */
2546 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2547 {
2548     if (sve_access_check(s)) {
2549         int esz = a->esz;
2550         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2551         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2552
2553         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2554         write_fp_dreg(s, a->rd, reg);
2555         tcg_temp_free_i64(reg);
2556     }
2557     return true;
2558 }
2559
2560 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2561 {
2562     return do_clast_fp(s, a, false);
2563 }
2564
2565 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2566 {
2567     return do_clast_fp(s, a, true);
2568 }
2569
2570 /* Compute CLAST for a Xreg.  */
2571 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2572 {
2573     TCGv_i64 reg;
2574
2575     if (!sve_access_check(s)) {
2576         return true;
2577     }
2578
2579     reg = cpu_reg(s, a->rd);
2580     switch (a->esz) {
2581     case 0:
2582         tcg_gen_ext8u_i64(reg, reg);
2583         break;
2584     case 1:
2585         tcg_gen_ext16u_i64(reg, reg);
2586         break;
2587     case 2:
2588         tcg_gen_ext32u_i64(reg, reg);
2589         break;
2590     case 3:
2591         break;
2592     default:
2593         g_assert_not_reached();
2594     }
2595
2596     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2597     return true;
2598 }
2599
2600 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2601 {
2602     return do_clast_general(s, a, false);
2603 }
2604
2605 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2606 {
2607     return do_clast_general(s, a, true);
2608 }
2609
2610 /* Compute LAST for a scalar.  */
2611 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2612                                int pg, int rm, bool before)
2613 {
2614     TCGv_i32 last = tcg_temp_new_i32();
2615     TCGv_i64 ret;
2616
2617     find_last_active(s, last, esz, pg);
2618     if (before) {
2619         wrap_last_active(s, last, esz);
2620     } else {
2621         incr_last_active(s, last, esz);
2622     }
2623
2624     ret = load_last_active(s, last, rm, esz);
2625     tcg_temp_free_i32(last);
2626     return ret;
2627 }
2628
2629 /* Compute LAST for a Vreg.  */
2630 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631 {
2632     if (sve_access_check(s)) {
2633         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2634         write_fp_dreg(s, a->rd, val);
2635         tcg_temp_free_i64(val);
2636     }
2637     return true;
2638 }
2639
2640 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2641 {
2642     return do_last_fp(s, a, false);
2643 }
2644
2645 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646 {
2647     return do_last_fp(s, a, true);
2648 }
2649
2650 /* Compute LAST for a Xreg.  */
2651 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2652 {
2653     if (sve_access_check(s)) {
2654         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2655         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2656         tcg_temp_free_i64(val);
2657     }
2658     return true;
2659 }
2660
2661 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2662 {
2663     return do_last_general(s, a, false);
2664 }
2665
2666 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2667 {
2668     return do_last_general(s, a, true);
2669 }
2670
2671 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2672 {
2673     if (sve_access_check(s)) {
2674         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2675     }
2676     return true;
2677 }
2678
2679 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680 {
2681     if (sve_access_check(s)) {
2682         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2683         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2684         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2685         tcg_temp_free_i64(t);
2686     }
2687     return true;
2688 }
2689
2690 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691 {
2692     static gen_helper_gvec_3 * const fns[4] = {
2693         NULL,
2694         gen_helper_sve_revb_h,
2695         gen_helper_sve_revb_s,
2696         gen_helper_sve_revb_d,
2697     };
2698     return do_zpz_ool(s, a, fns[a->esz]);
2699 }
2700
2701 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702 {
2703     static gen_helper_gvec_3 * const fns[4] = {
2704         NULL,
2705         NULL,
2706         gen_helper_sve_revh_s,
2707         gen_helper_sve_revh_d,
2708     };
2709     return do_zpz_ool(s, a, fns[a->esz]);
2710 }
2711
2712 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2713 {
2714     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2715 }
2716
2717 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2718 {
2719     static gen_helper_gvec_3 * const fns[4] = {
2720         gen_helper_sve_rbit_b,
2721         gen_helper_sve_rbit_h,
2722         gen_helper_sve_rbit_s,
2723         gen_helper_sve_rbit_d,
2724     };
2725     return do_zpz_ool(s, a, fns[a->esz]);
2726 }
2727
2728 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2729 {
2730     if (sve_access_check(s)) {
2731         unsigned vsz = vec_full_reg_size(s);
2732         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2733                            vec_full_reg_offset(s, a->rn),
2734                            vec_full_reg_offset(s, a->rm),
2735                            pred_full_reg_offset(s, a->pg),
2736                            vsz, vsz, a->esz, gen_helper_sve_splice);
2737     }
2738     return true;
2739 }
2740
2741 /*
2742  *** SVE Integer Compare - Vectors Group
2743  */
2744
2745 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2746                           gen_helper_gvec_flags_4 *gen_fn)
2747 {
2748     TCGv_ptr pd, zn, zm, pg;
2749     unsigned vsz;
2750     TCGv_i32 t;
2751
2752     if (gen_fn == NULL) {
2753         return false;
2754     }
2755     if (!sve_access_check(s)) {
2756         return true;
2757     }
2758
2759     vsz = vec_full_reg_size(s);
2760     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2761     pd = tcg_temp_new_ptr();
2762     zn = tcg_temp_new_ptr();
2763     zm = tcg_temp_new_ptr();
2764     pg = tcg_temp_new_ptr();
2765
2766     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2767     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2768     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2769     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2770
2771     gen_fn(t, pd, zn, zm, pg, t);
2772
2773     tcg_temp_free_ptr(pd);
2774     tcg_temp_free_ptr(zn);
2775     tcg_temp_free_ptr(zm);
2776     tcg_temp_free_ptr(pg);
2777
2778     do_pred_flags(t);
2779
2780     tcg_temp_free_i32(t);
2781     return true;
2782 }
2783
2784 #define DO_PPZZ(NAME, name) \
2785 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2786                                 uint32_t insn)                            \
2787 {                                                                         \
2788     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2789         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2790         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2791     };                                                                    \
2792     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2793 }
2794
2795 DO_PPZZ(CMPEQ, cmpeq)
2796 DO_PPZZ(CMPNE, cmpne)
2797 DO_PPZZ(CMPGT, cmpgt)
2798 DO_PPZZ(CMPGE, cmpge)
2799 DO_PPZZ(CMPHI, cmphi)
2800 DO_PPZZ(CMPHS, cmphs)
2801
2802 #undef DO_PPZZ
2803
2804 #define DO_PPZW(NAME, name) \
2805 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2806                                 uint32_t insn)                            \
2807 {                                                                         \
2808     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2809         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2810         gen_helper_sve_##name##_ppzw_s, NULL                              \
2811     };                                                                    \
2812     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2813 }
2814
2815 DO_PPZW(CMPEQ, cmpeq)
2816 DO_PPZW(CMPNE, cmpne)
2817 DO_PPZW(CMPGT, cmpgt)
2818 DO_PPZW(CMPGE, cmpge)
2819 DO_PPZW(CMPHI, cmphi)
2820 DO_PPZW(CMPHS, cmphs)
2821 DO_PPZW(CMPLT, cmplt)
2822 DO_PPZW(CMPLE, cmple)
2823 DO_PPZW(CMPLO, cmplo)
2824 DO_PPZW(CMPLS, cmpls)
2825
2826 #undef DO_PPZW
2827
2828 /*
2829  *** SVE Integer Compare - Immediate Groups
2830  */
2831
2832 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2833                           gen_helper_gvec_flags_3 *gen_fn)
2834 {
2835     TCGv_ptr pd, zn, pg;
2836     unsigned vsz;
2837     TCGv_i32 t;
2838
2839     if (gen_fn == NULL) {
2840         return false;
2841     }
2842     if (!sve_access_check(s)) {
2843         return true;
2844     }
2845
2846     vsz = vec_full_reg_size(s);
2847     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2848     pd = tcg_temp_new_ptr();
2849     zn = tcg_temp_new_ptr();
2850     pg = tcg_temp_new_ptr();
2851
2852     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2855
2856     gen_fn(t, pd, zn, pg, t);
2857
2858     tcg_temp_free_ptr(pd);
2859     tcg_temp_free_ptr(zn);
2860     tcg_temp_free_ptr(pg);
2861
2862     do_pred_flags(t);
2863
2864     tcg_temp_free_i32(t);
2865     return true;
2866 }
2867
2868 #define DO_PPZI(NAME, name) \
2869 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2870                                 uint32_t insn)                            \
2871 {                                                                         \
2872     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2873         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2874         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2875     };                                                                    \
2876     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2877 }
2878
2879 DO_PPZI(CMPEQ, cmpeq)
2880 DO_PPZI(CMPNE, cmpne)
2881 DO_PPZI(CMPGT, cmpgt)
2882 DO_PPZI(CMPGE, cmpge)
2883 DO_PPZI(CMPHI, cmphi)
2884 DO_PPZI(CMPHS, cmphs)
2885 DO_PPZI(CMPLT, cmplt)
2886 DO_PPZI(CMPLE, cmple)
2887 DO_PPZI(CMPLO, cmplo)
2888 DO_PPZI(CMPLS, cmpls)
2889
2890 #undef DO_PPZI
2891
2892 /*
2893  *** SVE Partition Break Group
2894  */
2895
2896 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2897                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2898 {
2899     if (!sve_access_check(s)) {
2900         return true;
2901     }
2902
2903     unsigned vsz = pred_full_reg_size(s);
2904
2905     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2906     TCGv_ptr d = tcg_temp_new_ptr();
2907     TCGv_ptr n = tcg_temp_new_ptr();
2908     TCGv_ptr m = tcg_temp_new_ptr();
2909     TCGv_ptr g = tcg_temp_new_ptr();
2910     TCGv_i32 t = tcg_const_i32(vsz - 2);
2911
2912     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2913     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2914     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2915     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916
2917     if (a->s) {
2918         fn_s(t, d, n, m, g, t);
2919         do_pred_flags(t);
2920     } else {
2921         fn(d, n, m, g, t);
2922     }
2923     tcg_temp_free_ptr(d);
2924     tcg_temp_free_ptr(n);
2925     tcg_temp_free_ptr(m);
2926     tcg_temp_free_ptr(g);
2927     tcg_temp_free_i32(t);
2928     return true;
2929 }
2930
2931 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2932                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2933 {
2934     if (!sve_access_check(s)) {
2935         return true;
2936     }
2937
2938     unsigned vsz = pred_full_reg_size(s);
2939
2940     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2941     TCGv_ptr d = tcg_temp_new_ptr();
2942     TCGv_ptr n = tcg_temp_new_ptr();
2943     TCGv_ptr g = tcg_temp_new_ptr();
2944     TCGv_i32 t = tcg_const_i32(vsz - 2);
2945
2946     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2947     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2948     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2949
2950     if (a->s) {
2951         fn_s(t, d, n, g, t);
2952         do_pred_flags(t);
2953     } else {
2954         fn(d, n, g, t);
2955     }
2956     tcg_temp_free_ptr(d);
2957     tcg_temp_free_ptr(n);
2958     tcg_temp_free_ptr(g);
2959     tcg_temp_free_i32(t);
2960     return true;
2961 }
2962
2963 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2964 {
2965     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2966 }
2967
2968 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2969 {
2970     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2971 }
2972
2973 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2974 {
2975     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2976 }
2977
2978 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2979 {
2980     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2981 }
2982
2983 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2984 {
2985     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2986 }
2987
2988 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2989 {
2990     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2991 }
2992
2993 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2994 {
2995     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2996 }
2997
2998 /*
2999  *** SVE Predicate Count Group
3000  */
3001
3002 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3003 {
3004     unsigned psz = pred_full_reg_size(s);
3005
3006     if (psz <= 8) {
3007         uint64_t psz_mask;
3008
3009         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3010         if (pn != pg) {
3011             TCGv_i64 g = tcg_temp_new_i64();
3012             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3013             tcg_gen_and_i64(val, val, g);
3014             tcg_temp_free_i64(g);
3015         }
3016
3017         /* Reduce the pred_esz_masks value simply to reduce the
3018          * size of the code generated here.
3019          */
3020         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3021         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3022
3023         tcg_gen_ctpop_i64(val, val);
3024     } else {
3025         TCGv_ptr t_pn = tcg_temp_new_ptr();
3026         TCGv_ptr t_pg = tcg_temp_new_ptr();
3027         unsigned desc;
3028         TCGv_i32 t_desc;
3029
3030         desc = psz - 2;
3031         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3032
3033         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3034         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3035         t_desc = tcg_const_i32(desc);
3036
3037         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3038         tcg_temp_free_ptr(t_pn);
3039         tcg_temp_free_ptr(t_pg);
3040         tcg_temp_free_i32(t_desc);
3041     }
3042 }
3043
3044 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3045 {
3046     if (sve_access_check(s)) {
3047         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3048     }
3049     return true;
3050 }
3051
3052 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3053                             uint32_t insn)
3054 {
3055     if (sve_access_check(s)) {
3056         TCGv_i64 reg = cpu_reg(s, a->rd);
3057         TCGv_i64 val = tcg_temp_new_i64();
3058
3059         do_cntp(s, val, a->esz, a->pg, a->pg);
3060         if (a->d) {
3061             tcg_gen_sub_i64(reg, reg, val);
3062         } else {
3063             tcg_gen_add_i64(reg, reg, val);
3064         }
3065         tcg_temp_free_i64(val);
3066     }
3067     return true;
3068 }
3069
3070 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3071                             uint32_t insn)
3072 {
3073     if (a->esz == 0) {
3074         return false;
3075     }
3076     if (sve_access_check(s)) {
3077         unsigned vsz = vec_full_reg_size(s);
3078         TCGv_i64 val = tcg_temp_new_i64();
3079         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3080
3081         do_cntp(s, val, a->esz, a->pg, a->pg);
3082         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3083                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3084     }
3085     return true;
3086 }
3087
3088 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3089                                 uint32_t insn)
3090 {
3091     if (sve_access_check(s)) {
3092         TCGv_i64 reg = cpu_reg(s, a->rd);
3093         TCGv_i64 val = tcg_temp_new_i64();
3094
3095         do_cntp(s, val, a->esz, a->pg, a->pg);
3096         do_sat_addsub_32(reg, val, a->u, a->d);
3097     }
3098     return true;
3099 }
3100
3101 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3102                                 uint32_t insn)
3103 {
3104     if (sve_access_check(s)) {
3105         TCGv_i64 reg = cpu_reg(s, a->rd);
3106         TCGv_i64 val = tcg_temp_new_i64();
3107
3108         do_cntp(s, val, a->esz, a->pg, a->pg);
3109         do_sat_addsub_64(reg, val, a->u, a->d);
3110     }
3111     return true;
3112 }
3113
3114 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3115                              uint32_t insn)
3116 {
3117     if (a->esz == 0) {
3118         return false;
3119     }
3120     if (sve_access_check(s)) {
3121         TCGv_i64 val = tcg_temp_new_i64();
3122         do_cntp(s, val, a->esz, a->pg, a->pg);
3123         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3124     }
3125     return true;
3126 }
3127
3128 /*
3129  *** SVE Integer Compare Scalars Group
3130  */
3131
3132 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3133 {
3134     if (!sve_access_check(s)) {
3135         return true;
3136     }
3137
3138     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3139     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3140     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3141     TCGv_i64 cmp = tcg_temp_new_i64();
3142
3143     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3144     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3145     tcg_temp_free_i64(cmp);
3146
3147     /* VF = !NF & !CF.  */
3148     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3149     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3150
3151     /* Both NF and VF actually look at bit 31.  */
3152     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3153     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3154     return true;
3155 }
3156
3157 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3158 {
3159     if (!sve_access_check(s)) {
3160         return true;
3161     }
3162
3163     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3164     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3165     TCGv_i64 t0 = tcg_temp_new_i64();
3166     TCGv_i64 t1 = tcg_temp_new_i64();
3167     TCGv_i32 t2, t3;
3168     TCGv_ptr ptr;
3169     unsigned desc, vsz = vec_full_reg_size(s);
3170     TCGCond cond;
3171
3172     if (!a->sf) {
3173         if (a->u) {
3174             tcg_gen_ext32u_i64(op0, op0);
3175             tcg_gen_ext32u_i64(op1, op1);
3176         } else {
3177             tcg_gen_ext32s_i64(op0, op0);
3178             tcg_gen_ext32s_i64(op1, op1);
3179         }
3180     }
3181
3182     /* For the helper, compress the different conditions into a computation
3183      * of how many iterations for which the condition is true.
3184      *
3185      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3186      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3187      * aren't that large, so any value >= predicate size is sufficient.
3188      */
3189     tcg_gen_sub_i64(t0, op1, op0);
3190
3191     /* t0 = MIN(op1 - op0, vsz).  */
3192     tcg_gen_movi_i64(t1, vsz);
3193     tcg_gen_umin_i64(t0, t0, t1);
3194     if (a->eq) {
3195         /* Equality means one more iteration.  */
3196         tcg_gen_addi_i64(t0, t0, 1);
3197     }
3198
3199     /* t0 = (condition true ? t0 : 0).  */
3200     cond = (a->u
3201             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3202             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3203     tcg_gen_movi_i64(t1, 0);
3204     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3205
3206     t2 = tcg_temp_new_i32();
3207     tcg_gen_extrl_i64_i32(t2, t0);
3208     tcg_temp_free_i64(t0);
3209     tcg_temp_free_i64(t1);
3210
3211     desc = (vsz / 8) - 2;
3212     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3213     t3 = tcg_const_i32(desc);
3214
3215     ptr = tcg_temp_new_ptr();
3216     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3217
3218     gen_helper_sve_while(t2, ptr, t2, t3);
3219     do_pred_flags(t2);
3220
3221     tcg_temp_free_ptr(ptr);
3222     tcg_temp_free_i32(t2);
3223     tcg_temp_free_i32(t3);
3224     return true;
3225 }
3226
3227 /*
3228  *** SVE Integer Wide Immediate - Unpredicated Group
3229  */
3230
3231 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3232 {
3233     if (a->esz == 0) {
3234         return false;
3235     }
3236     if (sve_access_check(s)) {
3237         unsigned vsz = vec_full_reg_size(s);
3238         int dofs = vec_full_reg_offset(s, a->rd);
3239         uint64_t imm;
3240
3241         /* Decode the VFP immediate.  */
3242         imm = vfp_expand_imm(a->esz, a->imm);
3243         imm = dup_const(a->esz, imm);
3244
3245         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3246     }
3247     return true;
3248 }
3249
3250 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3251 {
3252     if (a->esz == 0 && extract32(insn, 13, 1)) {
3253         return false;
3254     }
3255     if (sve_access_check(s)) {
3256         unsigned vsz = vec_full_reg_size(s);
3257         int dofs = vec_full_reg_offset(s, a->rd);
3258
3259         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3260     }
3261     return true;
3262 }
3263
3264 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3265 {
3266     if (a->esz == 0 && extract32(insn, 13, 1)) {
3267         return false;
3268     }
3269     if (sve_access_check(s)) {
3270         unsigned vsz = vec_full_reg_size(s);
3271         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3272                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3273     }
3274     return true;
3275 }
3276
3277 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3278 {
3279     a->imm = -a->imm;
3280     return trans_ADD_zzi(s, a, insn);
3281 }
3282
3283 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3284 {
3285     static const GVecGen2s op[4] = {
3286         { .fni8 = tcg_gen_vec_sub8_i64,
3287           .fniv = tcg_gen_sub_vec,
3288           .fno = gen_helper_sve_subri_b,
3289           .opc = INDEX_op_sub_vec,
3290           .vece = MO_8,
3291           .scalar_first = true },
3292         { .fni8 = tcg_gen_vec_sub16_i64,
3293           .fniv = tcg_gen_sub_vec,
3294           .fno = gen_helper_sve_subri_h,
3295           .opc = INDEX_op_sub_vec,
3296           .vece = MO_16,
3297           .scalar_first = true },
3298         { .fni4 = tcg_gen_sub_i32,
3299           .fniv = tcg_gen_sub_vec,
3300           .fno = gen_helper_sve_subri_s,
3301           .opc = INDEX_op_sub_vec,
3302           .vece = MO_32,
3303           .scalar_first = true },
3304         { .fni8 = tcg_gen_sub_i64,
3305           .fniv = tcg_gen_sub_vec,
3306           .fno = gen_helper_sve_subri_d,
3307           .opc = INDEX_op_sub_vec,
3308           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3309           .vece = MO_64,
3310           .scalar_first = true }
3311     };
3312
3313     if (a->esz == 0 && extract32(insn, 13, 1)) {
3314         return false;
3315     }
3316     if (sve_access_check(s)) {
3317         unsigned vsz = vec_full_reg_size(s);
3318         TCGv_i64 c = tcg_const_i64(a->imm);
3319         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3320                         vec_full_reg_offset(s, a->rn),
3321                         vsz, vsz, c, &op[a->esz]);
3322         tcg_temp_free_i64(c);
3323     }
3324     return true;
3325 }
3326
3327 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3328 {
3329     if (sve_access_check(s)) {
3330         unsigned vsz = vec_full_reg_size(s);
3331         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3332                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3333     }
3334     return true;
3335 }
3336
3337 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3338                        bool u, bool d)
3339 {
3340     if (a->esz == 0 && extract32(insn, 13, 1)) {
3341         return false;
3342     }
3343     if (sve_access_check(s)) {
3344         TCGv_i64 val = tcg_const_i64(a->imm);
3345         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3346         tcg_temp_free_i64(val);
3347     }
3348     return true;
3349 }
3350
3351 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3352 {
3353     return do_zzi_sat(s, a, insn, false, false);
3354 }
3355
3356 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3357 {
3358     return do_zzi_sat(s, a, insn, true, false);
3359 }
3360
3361 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3362 {
3363     return do_zzi_sat(s, a, insn, false, true);
3364 }
3365
3366 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3367 {
3368     return do_zzi_sat(s, a, insn, true, true);
3369 }
3370
3371 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3372 {
3373     if (sve_access_check(s)) {
3374         unsigned vsz = vec_full_reg_size(s);
3375         TCGv_i64 c = tcg_const_i64(a->imm);
3376
3377         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3378                             vec_full_reg_offset(s, a->rn),
3379                             c, vsz, vsz, 0, fn);
3380         tcg_temp_free_i64(c);
3381     }
3382     return true;
3383 }
3384
3385 #define DO_ZZI(NAME, name) \
3386 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3387                                uint32_t insn)                           \
3388 {                                                                       \
3389     static gen_helper_gvec_2i * const fns[4] = {                        \
3390         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3391         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3392     };                                                                  \
3393     return do_zzi_ool(s, a, fns[a->esz]);                               \
3394 }
3395
3396 DO_ZZI(SMAX, smax)
3397 DO_ZZI(UMAX, umax)
3398 DO_ZZI(SMIN, smin)
3399 DO_ZZI(UMIN, umin)
3400
3401 #undef DO_ZZI
3402
3403 /*
3404  *** SVE Floating Point Multiply-Add Indexed Group
3405  */
3406
3407 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3408 {
3409     static gen_helper_gvec_4_ptr * const fns[3] = {
3410         gen_helper_gvec_fmla_idx_h,
3411         gen_helper_gvec_fmla_idx_s,
3412         gen_helper_gvec_fmla_idx_d,
3413     };
3414
3415     if (sve_access_check(s)) {
3416         unsigned vsz = vec_full_reg_size(s);
3417         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3418         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3419                            vec_full_reg_offset(s, a->rn),
3420                            vec_full_reg_offset(s, a->rm),
3421                            vec_full_reg_offset(s, a->ra),
3422                            status, vsz, vsz, (a->index << 1) | a->sub,
3423                            fns[a->esz - 1]);
3424         tcg_temp_free_ptr(status);
3425     }
3426     return true;
3427 }
3428
3429 /*
3430  *** SVE Floating Point Multiply Indexed Group
3431  */
3432
3433 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3434 {
3435     static gen_helper_gvec_3_ptr * const fns[3] = {
3436         gen_helper_gvec_fmul_idx_h,
3437         gen_helper_gvec_fmul_idx_s,
3438         gen_helper_gvec_fmul_idx_d,
3439     };
3440
3441     if (sve_access_check(s)) {
3442         unsigned vsz = vec_full_reg_size(s);
3443         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3444         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3445                            vec_full_reg_offset(s, a->rn),
3446                            vec_full_reg_offset(s, a->rm),
3447                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3448         tcg_temp_free_ptr(status);
3449     }
3450     return true;
3451 }
3452
3453 /*
3454  *** SVE Floating Point Fast Reduction Group
3455  */
3456
3457 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3458                                   TCGv_ptr, TCGv_i32);
3459
3460 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3461                       gen_helper_fp_reduce *fn)
3462 {
3463     unsigned vsz = vec_full_reg_size(s);
3464     unsigned p2vsz = pow2ceil(vsz);
3465     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3466     TCGv_ptr t_zn, t_pg, status;
3467     TCGv_i64 temp;
3468
3469     temp = tcg_temp_new_i64();
3470     t_zn = tcg_temp_new_ptr();
3471     t_pg = tcg_temp_new_ptr();
3472
3473     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3474     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3475     status = get_fpstatus_ptr(a->esz == MO_16);
3476
3477     fn(temp, t_zn, t_pg, status, t_desc);
3478     tcg_temp_free_ptr(t_zn);
3479     tcg_temp_free_ptr(t_pg);
3480     tcg_temp_free_ptr(status);
3481     tcg_temp_free_i32(t_desc);
3482
3483     write_fp_dreg(s, a->rd, temp);
3484     tcg_temp_free_i64(temp);
3485 }
3486
3487 #define DO_VPZ(NAME, name) \
3488 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3489 {                                                                        \
3490     static gen_helper_fp_reduce * const fns[3] = {                       \
3491         gen_helper_sve_##name##_h,                                       \
3492         gen_helper_sve_##name##_s,                                       \
3493         gen_helper_sve_##name##_d,                                       \
3494     };                                                                   \
3495     if (a->esz == 0) {                                                   \
3496         return false;                                                    \
3497     }                                                                    \
3498     if (sve_access_check(s)) {                                           \
3499         do_reduce(s, a, fns[a->esz - 1]);                                \
3500     }                                                                    \
3501     return true;                                                         \
3502 }
3503
3504 DO_VPZ(FADDV, faddv)
3505 DO_VPZ(FMINNMV, fminnmv)
3506 DO_VPZ(FMAXNMV, fmaxnmv)
3507 DO_VPZ(FMINV, fminv)
3508 DO_VPZ(FMAXV, fmaxv)
3509
3510 /*
3511  *** SVE Floating Point Unary Operations - Unpredicated Group
3512  */
3513
3514 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3515 {
3516     unsigned vsz = vec_full_reg_size(s);
3517     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3518
3519     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3520                        vec_full_reg_offset(s, a->rn),
3521                        status, vsz, vsz, 0, fn);
3522     tcg_temp_free_ptr(status);
3523 }
3524
3525 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3526 {
3527     static gen_helper_gvec_2_ptr * const fns[3] = {
3528         gen_helper_gvec_frecpe_h,
3529         gen_helper_gvec_frecpe_s,
3530         gen_helper_gvec_frecpe_d,
3531     };
3532     if (a->esz == 0) {
3533         return false;
3534     }
3535     if (sve_access_check(s)) {
3536         do_zz_fp(s, a, fns[a->esz - 1]);
3537     }
3538     return true;
3539 }
3540
3541 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3542 {
3543     static gen_helper_gvec_2_ptr * const fns[3] = {
3544         gen_helper_gvec_frsqrte_h,
3545         gen_helper_gvec_frsqrte_s,
3546         gen_helper_gvec_frsqrte_d,
3547     };
3548     if (a->esz == 0) {
3549         return false;
3550     }
3551     if (sve_access_check(s)) {
3552         do_zz_fp(s, a, fns[a->esz - 1]);
3553     }
3554     return true;
3555 }
3556
3557 /*
3558  *** SVE Floating Point Compare with Zero Group
3559  */
3560
3561 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3562                       gen_helper_gvec_3_ptr *fn)
3563 {
3564     unsigned vsz = vec_full_reg_size(s);
3565     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3566
3567     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3568                        vec_full_reg_offset(s, a->rn),
3569                        pred_full_reg_offset(s, a->pg),
3570                        status, vsz, vsz, 0, fn);
3571     tcg_temp_free_ptr(status);
3572 }
3573
3574 #define DO_PPZ(NAME, name) \
3575 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3576 {                                                                 \
3577     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3578         gen_helper_sve_##name##_h,                                \
3579         gen_helper_sve_##name##_s,                                \
3580         gen_helper_sve_##name##_d,                                \
3581     };                                                            \
3582     if (a->esz == 0) {                                            \
3583         return false;                                             \
3584     }                                                             \
3585     if (sve_access_check(s)) {                                    \
3586         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3587     }                                                             \
3588     return true;                                                  \
3589 }
3590
3591 DO_PPZ(FCMGE_ppz0, fcmge0)
3592 DO_PPZ(FCMGT_ppz0, fcmgt0)
3593 DO_PPZ(FCMLE_ppz0, fcmle0)
3594 DO_PPZ(FCMLT_ppz0, fcmlt0)
3595 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3596 DO_PPZ(FCMNE_ppz0, fcmne0)
3597
3598 #undef DO_PPZ
3599
3600 /*
3601  *** SVE floating-point trig multiply-add coefficient
3602  */
3603
3604 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3605 {
3606     static gen_helper_gvec_3_ptr * const fns[3] = {
3607         gen_helper_sve_ftmad_h,
3608         gen_helper_sve_ftmad_s,
3609         gen_helper_sve_ftmad_d,
3610     };
3611
3612     if (a->esz == 0) {
3613         return false;
3614     }
3615     if (sve_access_check(s)) {
3616         unsigned vsz = vec_full_reg_size(s);
3617         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3618         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3619                            vec_full_reg_offset(s, a->rn),
3620                            vec_full_reg_offset(s, a->rm),
3621                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3622         tcg_temp_free_ptr(status);
3623     }
3624     return true;
3625 }
3626
3627 /*
3628  *** SVE Floating Point Accumulating Reduction Group
3629  */
3630
3631 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3632 {
3633     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3634                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3635     static fadda_fn * const fns[3] = {
3636         gen_helper_sve_fadda_h,
3637         gen_helper_sve_fadda_s,
3638         gen_helper_sve_fadda_d,
3639     };
3640     unsigned vsz = vec_full_reg_size(s);
3641     TCGv_ptr t_rm, t_pg, t_fpst;
3642     TCGv_i64 t_val;
3643     TCGv_i32 t_desc;
3644
3645     if (a->esz == 0) {
3646         return false;
3647     }
3648     if (!sve_access_check(s)) {
3649         return true;
3650     }
3651
3652     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3653     t_rm = tcg_temp_new_ptr();
3654     t_pg = tcg_temp_new_ptr();
3655     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3656     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3657     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3658     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3659
3660     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3661
3662     tcg_temp_free_i32(t_desc);
3663     tcg_temp_free_ptr(t_fpst);
3664     tcg_temp_free_ptr(t_pg);
3665     tcg_temp_free_ptr(t_rm);
3666
3667     write_fp_dreg(s, a->rd, t_val);
3668     tcg_temp_free_i64(t_val);
3669     return true;
3670 }
3671
3672 /*
3673  *** SVE Floating Point Arithmetic - Unpredicated Group
3674  */
3675
3676 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3677                       gen_helper_gvec_3_ptr *fn)
3678 {
3679     if (fn == NULL) {
3680         return false;
3681     }
3682     if (sve_access_check(s)) {
3683         unsigned vsz = vec_full_reg_size(s);
3684         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3685         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3686                            vec_full_reg_offset(s, a->rn),
3687                            vec_full_reg_offset(s, a->rm),
3688                            status, vsz, vsz, 0, fn);
3689         tcg_temp_free_ptr(status);
3690     }
3691     return true;
3692 }
3693
3694
3695 #define DO_FP3(NAME, name) \
3696 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3697 {                                                                   \
3698     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3699         NULL, gen_helper_gvec_##name##_h,                           \
3700         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3701     };                                                              \
3702     return do_zzz_fp(s, a, fns[a->esz]);                            \
3703 }
3704
3705 DO_FP3(FADD_zzz, fadd)
3706 DO_FP3(FSUB_zzz, fsub)
3707 DO_FP3(FMUL_zzz, fmul)
3708 DO_FP3(FTSMUL, ftsmul)
3709 DO_FP3(FRECPS, recps)
3710 DO_FP3(FRSQRTS, rsqrts)
3711
3712 #undef DO_FP3
3713
3714 /*
3715  *** SVE Floating Point Arithmetic - Predicated Group
3716  */
3717
3718 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3719                        gen_helper_gvec_4_ptr *fn)
3720 {
3721     if (fn == NULL) {
3722         return false;
3723     }
3724     if (sve_access_check(s)) {
3725         unsigned vsz = vec_full_reg_size(s);
3726         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3727         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3728                            vec_full_reg_offset(s, a->rn),
3729                            vec_full_reg_offset(s, a->rm),
3730                            pred_full_reg_offset(s, a->pg),
3731                            status, vsz, vsz, 0, fn);
3732         tcg_temp_free_ptr(status);
3733     }
3734     return true;
3735 }
3736
3737 #define DO_FP3(NAME, name) \
3738 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3739 {                                                                   \
3740     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3741         NULL, gen_helper_sve_##name##_h,                            \
3742         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3743     };                                                              \
3744     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3745 }
3746
3747 DO_FP3(FADD_zpzz, fadd)
3748 DO_FP3(FSUB_zpzz, fsub)
3749 DO_FP3(FMUL_zpzz, fmul)
3750 DO_FP3(FMIN_zpzz, fmin)
3751 DO_FP3(FMAX_zpzz, fmax)
3752 DO_FP3(FMINNM_zpzz, fminnum)
3753 DO_FP3(FMAXNM_zpzz, fmaxnum)
3754 DO_FP3(FABD, fabd)
3755 DO_FP3(FSCALE, fscalbn)
3756 DO_FP3(FDIV, fdiv)
3757 DO_FP3(FMULX, fmulx)
3758
3759 #undef DO_FP3
3760
3761 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3762                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3763
3764 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3765                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3766 {
3767     unsigned vsz = vec_full_reg_size(s);
3768     TCGv_ptr t_zd, t_zn, t_pg, status;
3769     TCGv_i32 desc;
3770
3771     t_zd = tcg_temp_new_ptr();
3772     t_zn = tcg_temp_new_ptr();
3773     t_pg = tcg_temp_new_ptr();
3774     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3775     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3776     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3777
3778     status = get_fpstatus_ptr(is_fp16);
3779     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3780     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3781
3782     tcg_temp_free_i32(desc);
3783     tcg_temp_free_ptr(status);
3784     tcg_temp_free_ptr(t_pg);
3785     tcg_temp_free_ptr(t_zn);
3786     tcg_temp_free_ptr(t_zd);
3787 }
3788
3789 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3790                       gen_helper_sve_fp2scalar *fn)
3791 {
3792     TCGv_i64 temp = tcg_const_i64(imm);
3793     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3794     tcg_temp_free_i64(temp);
3795 }
3796
3797 #define DO_FP_IMM(NAME, name, const0, const1) \
3798 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3799                                 uint32_t insn)                            \
3800 {                                                                         \
3801     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3802         gen_helper_sve_##name##_h,                                        \
3803         gen_helper_sve_##name##_s,                                        \
3804         gen_helper_sve_##name##_d                                         \
3805     };                                                                    \
3806     static uint64_t const val[3][2] = {                                   \
3807         { float16_##const0, float16_##const1 },                           \
3808         { float32_##const0, float32_##const1 },                           \
3809         { float64_##const0, float64_##const1 },                           \
3810     };                                                                    \
3811     if (a->esz == 0) {                                                    \
3812         return false;                                                     \
3813     }                                                                     \
3814     if (sve_access_check(s)) {                                            \
3815         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3816     }                                                                     \
3817     return true;                                                          \
3818 }
3819
3820 #define float16_two  make_float16(0x4000)
3821 #define float32_two  make_float32(0x40000000)
3822 #define float64_two  make_float64(0x4000000000000000ULL)
3823
3824 DO_FP_IMM(FADD, fadds, half, one)
3825 DO_FP_IMM(FSUB, fsubs, half, one)
3826 DO_FP_IMM(FMUL, fmuls, half, two)
3827 DO_FP_IMM(FSUBR, fsubrs, half, one)
3828 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3829 DO_FP_IMM(FMINNM, fminnms, zero, one)
3830 DO_FP_IMM(FMAX, fmaxs, zero, one)
3831 DO_FP_IMM(FMIN, fmins, zero, one)
3832
3833 #undef DO_FP_IMM
3834
3835 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3836                       gen_helper_gvec_4_ptr *fn)
3837 {
3838     if (fn == NULL) {
3839         return false;
3840     }
3841     if (sve_access_check(s)) {
3842         unsigned vsz = vec_full_reg_size(s);
3843         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3844         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3845                            vec_full_reg_offset(s, a->rn),
3846                            vec_full_reg_offset(s, a->rm),
3847                            pred_full_reg_offset(s, a->pg),
3848                            status, vsz, vsz, 0, fn);
3849         tcg_temp_free_ptr(status);
3850     }
3851     return true;
3852 }
3853
3854 #define DO_FPCMP(NAME, name) \
3855 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3856                                 uint32_t insn)                        \
3857 {                                                                     \
3858     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3859         NULL, gen_helper_sve_##name##_h,                              \
3860         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3861     };                                                                \
3862     return do_fp_cmp(s, a, fns[a->esz]);                              \
3863 }
3864
3865 DO_FPCMP(FCMGE, fcmge)
3866 DO_FPCMP(FCMGT, fcmgt)
3867 DO_FPCMP(FCMEQ, fcmeq)
3868 DO_FPCMP(FCMNE, fcmne)
3869 DO_FPCMP(FCMUO, fcmuo)
3870 DO_FPCMP(FACGE, facge)
3871 DO_FPCMP(FACGT, facgt)
3872
3873 #undef DO_FPCMP
3874
3875 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3876
3877 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3878 {
3879     if (fn == NULL) {
3880         return false;
3881     }
3882     if (!sve_access_check(s)) {
3883         return true;
3884     }
3885
3886     unsigned vsz = vec_full_reg_size(s);
3887     unsigned desc;
3888     TCGv_i32 t_desc;
3889     TCGv_ptr pg = tcg_temp_new_ptr();
3890
3891     /* We would need 7 operands to pass these arguments "properly".
3892      * So we encode all the register numbers into the descriptor.
3893      */
3894     desc = deposit32(a->rd, 5, 5, a->rn);
3895     desc = deposit32(desc, 10, 5, a->rm);
3896     desc = deposit32(desc, 15, 5, a->ra);
3897     desc = simd_desc(vsz, vsz, desc);
3898
3899     t_desc = tcg_const_i32(desc);
3900     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3901     fn(cpu_env, pg, t_desc);
3902     tcg_temp_free_i32(t_desc);
3903     tcg_temp_free_ptr(pg);
3904     return true;
3905 }
3906
3907 #define DO_FMLA(NAME, name) \
3908 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3909 {                                                                    \
3910     static gen_helper_sve_fmla * const fns[4] = {                    \
3911         NULL, gen_helper_sve_##name##_h,                             \
3912         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3913     };                                                               \
3914     return do_fmla(s, a, fns[a->esz]);                               \
3915 }
3916
3917 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3918 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3919 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3920 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3921
3922 #undef DO_FMLA
3923
3924 /*
3925  *** SVE Floating Point Unary Operations Predicated Group
3926  */
3927
3928 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3929                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
3930 {
3931     if (sve_access_check(s)) {
3932         unsigned vsz = vec_full_reg_size(s);
3933         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3934         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3935                            vec_full_reg_offset(s, rn),
3936                            pred_full_reg_offset(s, pg),
3937                            status, vsz, vsz, 0, fn);
3938         tcg_temp_free_ptr(status);
3939     }
3940     return true;
3941 }
3942
3943 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3944 {
3945     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3946 }
3947
3948 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3949 {
3950     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3951 }
3952
3953 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3954 {
3955     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3956 }
3957
3958 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3959 {
3960     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3961 }
3962
3963 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3964 {
3965     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3966 }
3967
3968 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3969 {
3970     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3971 }
3972
3973 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3974 {
3975     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3976 }
3977
3978 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3979 {
3980     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3981 }
3982
3983 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3984 {
3985     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3986 }
3987
3988 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3989 {
3990     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3991 }
3992
3993 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3994 {
3995     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3996 }
3997
3998 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3999 {
4000     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4001 }
4002
4003 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4004 {
4005     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4006 }
4007
4008 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4009 {
4010     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4011 }
4012
4013 /*
4014  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4015  */
4016
4017 /* Subroutine loading a vector register at VOFS of LEN bytes.
4018  * The load should begin at the address Rn + IMM.
4019  */
4020
4021 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4022                    int rn, int imm)
4023 {
4024     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4025     uint32_t len_remain = len % 8;
4026     uint32_t nparts = len / 8 + ctpop8(len_remain);
4027     int midx = get_mem_index(s);
4028     TCGv_i64 addr, t0, t1;
4029
4030     addr = tcg_temp_new_i64();
4031     t0 = tcg_temp_new_i64();
4032
4033     /* Note that unpredicated load/store of vector/predicate registers
4034      * are defined as a stream of bytes, which equates to little-endian
4035      * operations on larger quantities.  There is no nice way to force
4036      * a little-endian load for aarch64_be-linux-user out of line.
4037      *
4038      * Attempt to keep code expansion to a minimum by limiting the
4039      * amount of unrolling done.
4040      */
4041     if (nparts <= 4) {
4042         int i;
4043
4044         for (i = 0; i < len_align; i += 8) {
4045             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4046             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4047             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4048         }
4049     } else {
4050         TCGLabel *loop = gen_new_label();
4051         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4052
4053         gen_set_label(loop);
4054
4055         /* Minimize the number of local temps that must be re-read from
4056          * the stack each iteration.  Instead, re-compute values other
4057          * than the loop counter.
4058          */
4059         tp = tcg_temp_new_ptr();
4060         tcg_gen_addi_ptr(tp, i, imm);
4061         tcg_gen_extu_ptr_i64(addr, tp);
4062         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4063
4064         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4065
4066         tcg_gen_add_ptr(tp, cpu_env, i);
4067         tcg_gen_addi_ptr(i, i, 8);
4068         tcg_gen_st_i64(t0, tp, vofs);
4069         tcg_temp_free_ptr(tp);
4070
4071         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4072         tcg_temp_free_ptr(i);
4073     }
4074
4075     /* Predicate register loads can be any multiple of 2.
4076      * Note that we still store the entire 64-bit unit into cpu_env.
4077      */
4078     if (len_remain) {
4079         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4080
4081         switch (len_remain) {
4082         case 2:
4083         case 4:
4084         case 8:
4085             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4086             break;
4087
4088         case 6:
4089             t1 = tcg_temp_new_i64();
4090             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4091             tcg_gen_addi_i64(addr, addr, 4);
4092             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4093             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4094             tcg_temp_free_i64(t1);
4095             break;
4096
4097         default:
4098             g_assert_not_reached();
4099         }
4100         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4101     }
4102     tcg_temp_free_i64(addr);
4103     tcg_temp_free_i64(t0);
4104 }
4105
4106 /* Similarly for stores.  */
4107 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4108                    int rn, int imm)
4109 {
4110     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4111     uint32_t len_remain = len % 8;
4112     uint32_t nparts = len / 8 + ctpop8(len_remain);
4113     int midx = get_mem_index(s);
4114     TCGv_i64 addr, t0;
4115
4116     addr = tcg_temp_new_i64();
4117     t0 = tcg_temp_new_i64();
4118
4119     /* Note that unpredicated load/store of vector/predicate registers
4120      * are defined as a stream of bytes, which equates to little-endian
4121      * operations on larger quantities.  There is no nice way to force
4122      * a little-endian store for aarch64_be-linux-user out of line.
4123      *
4124      * Attempt to keep code expansion to a minimum by limiting the
4125      * amount of unrolling done.
4126      */
4127     if (nparts <= 4) {
4128         int i;
4129
4130         for (i = 0; i < len_align; i += 8) {
4131             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4132             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4133             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4134         }
4135     } else {
4136         TCGLabel *loop = gen_new_label();
4137         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4138
4139         gen_set_label(loop);
4140
4141         t2 = tcg_temp_new_ptr();
4142         tcg_gen_add_ptr(t2, cpu_env, i);
4143         tcg_gen_ld_i64(t0, t2, vofs);
4144
4145         /* Minimize the number of local temps that must be re-read from
4146          * the stack each iteration.  Instead, re-compute values other
4147          * than the loop counter.
4148          */
4149         tcg_gen_addi_ptr(t2, i, imm);
4150         tcg_gen_extu_ptr_i64(addr, t2);
4151         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4152         tcg_temp_free_ptr(t2);
4153
4154         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4155
4156         tcg_gen_addi_ptr(i, i, 8);
4157
4158         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4159         tcg_temp_free_ptr(i);
4160     }
4161
4162     /* Predicate register stores can be any multiple of 2.  */
4163     if (len_remain) {
4164         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4165         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4166
4167         switch (len_remain) {
4168         case 2:
4169         case 4:
4170         case 8:
4171             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4172             break;
4173
4174         case 6:
4175             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4176             tcg_gen_addi_i64(addr, addr, 4);
4177             tcg_gen_shri_i64(t0, t0, 32);
4178             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4179             break;
4180
4181         default:
4182             g_assert_not_reached();
4183         }
4184     }
4185     tcg_temp_free_i64(addr);
4186     tcg_temp_free_i64(t0);
4187 }
4188
4189 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4190 {
4191     if (sve_access_check(s)) {
4192         int size = vec_full_reg_size(s);
4193         int off = vec_full_reg_offset(s, a->rd);
4194         do_ldr(s, off, size, a->rn, a->imm * size);
4195     }
4196     return true;
4197 }
4198
4199 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4200 {
4201     if (sve_access_check(s)) {
4202         int size = pred_full_reg_size(s);
4203         int off = pred_full_reg_offset(s, a->rd);
4204         do_ldr(s, off, size, a->rn, a->imm * size);
4205     }
4206     return true;
4207 }
4208
4209 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4210 {
4211     if (sve_access_check(s)) {
4212         int size = vec_full_reg_size(s);
4213         int off = vec_full_reg_offset(s, a->rd);
4214         do_str(s, off, size, a->rn, a->imm * size);
4215     }
4216     return true;
4217 }
4218
4219 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4220 {
4221     if (sve_access_check(s)) {
4222         int size = pred_full_reg_size(s);
4223         int off = pred_full_reg_offset(s, a->rd);
4224         do_str(s, off, size, a->rn, a->imm * size);
4225     }
4226     return true;
4227 }
4228
4229 /*
4230  *** SVE Memory - Contiguous Load Group
4231  */
4232
4233 /* The memory mode of the dtype.  */
4234 static const TCGMemOp dtype_mop[16] = {
4235     MO_UB, MO_UB, MO_UB, MO_UB,
4236     MO_SL, MO_UW, MO_UW, MO_UW,
4237     MO_SW, MO_SW, MO_UL, MO_UL,
4238     MO_SB, MO_SB, MO_SB, MO_Q
4239 };
4240
4241 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4242
4243 /* The vector element size of dtype.  */
4244 static const uint8_t dtype_esz[16] = {
4245     0, 1, 2, 3,
4246     3, 1, 2, 3,
4247     3, 2, 2, 3,
4248     3, 2, 1, 3
4249 };
4250
4251 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4252                        gen_helper_gvec_mem *fn)
4253 {
4254     unsigned vsz = vec_full_reg_size(s);
4255     TCGv_ptr t_pg;
4256     TCGv_i32 desc;
4257
4258     /* For e.g. LD4, there are not enough arguments to pass all 4
4259      * registers as pointers, so encode the regno into the data field.
4260      * For consistency, do this even for LD1.
4261      */
4262     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4263     t_pg = tcg_temp_new_ptr();
4264
4265     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4266     fn(cpu_env, t_pg, addr, desc);
4267
4268     tcg_temp_free_ptr(t_pg);
4269     tcg_temp_free_i32(desc);
4270 }
4271
4272 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4273                       TCGv_i64 addr, int dtype, int nreg)
4274 {
4275     static gen_helper_gvec_mem * const fns[16][4] = {
4276         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4277           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4278         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4279         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4280         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4281
4282         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4283         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4284           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4285         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4286         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4287
4288         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4289         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4290         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4291           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4292         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4293
4294         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4295         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4296         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4297         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4298           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4299     };
4300     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4301
4302     /* While there are holes in the table, they are not
4303      * accessible via the instruction encoding.
4304      */
4305     assert(fn != NULL);
4306     do_mem_zpa(s, zt, pg, addr, fn);
4307 }
4308
4309 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4310 {
4311     if (a->rm == 31) {
4312         return false;
4313     }
4314     if (sve_access_check(s)) {
4315         TCGv_i64 addr = new_tmp_a64(s);
4316         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4317                          (a->nreg + 1) << dtype_msz(a->dtype));
4318         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4319         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4320     }
4321     return true;
4322 }
4323
4324 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4325 {
4326     if (sve_access_check(s)) {
4327         int vsz = vec_full_reg_size(s);
4328         int elements = vsz >> dtype_esz[a->dtype];
4329         TCGv_i64 addr = new_tmp_a64(s);
4330
4331         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4332                          (a->imm * elements * (a->nreg + 1))
4333                          << dtype_msz(a->dtype));
4334         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4335     }
4336     return true;
4337 }
4338
4339 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4340 {
4341     static gen_helper_gvec_mem * const fns[16] = {
4342         gen_helper_sve_ldff1bb_r,
4343         gen_helper_sve_ldff1bhu_r,
4344         gen_helper_sve_ldff1bsu_r,
4345         gen_helper_sve_ldff1bdu_r,
4346
4347         gen_helper_sve_ldff1sds_r,
4348         gen_helper_sve_ldff1hh_r,
4349         gen_helper_sve_ldff1hsu_r,
4350         gen_helper_sve_ldff1hdu_r,
4351
4352         gen_helper_sve_ldff1hds_r,
4353         gen_helper_sve_ldff1hss_r,
4354         gen_helper_sve_ldff1ss_r,
4355         gen_helper_sve_ldff1sdu_r,
4356
4357         gen_helper_sve_ldff1bds_r,
4358         gen_helper_sve_ldff1bss_r,
4359         gen_helper_sve_ldff1bhs_r,
4360         gen_helper_sve_ldff1dd_r,
4361     };
4362
4363     if (sve_access_check(s)) {
4364         TCGv_i64 addr = new_tmp_a64(s);
4365         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4366         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4367         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4368     }
4369     return true;
4370 }
4371
4372 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4373 {
4374     static gen_helper_gvec_mem * const fns[16] = {
4375         gen_helper_sve_ldnf1bb_r,
4376         gen_helper_sve_ldnf1bhu_r,
4377         gen_helper_sve_ldnf1bsu_r,
4378         gen_helper_sve_ldnf1bdu_r,
4379
4380         gen_helper_sve_ldnf1sds_r,
4381         gen_helper_sve_ldnf1hh_r,
4382         gen_helper_sve_ldnf1hsu_r,
4383         gen_helper_sve_ldnf1hdu_r,
4384
4385         gen_helper_sve_ldnf1hds_r,
4386         gen_helper_sve_ldnf1hss_r,
4387         gen_helper_sve_ldnf1ss_r,
4388         gen_helper_sve_ldnf1sdu_r,
4389
4390         gen_helper_sve_ldnf1bds_r,
4391         gen_helper_sve_ldnf1bss_r,
4392         gen_helper_sve_ldnf1bhs_r,
4393         gen_helper_sve_ldnf1dd_r,
4394     };
4395
4396     if (sve_access_check(s)) {
4397         int vsz = vec_full_reg_size(s);
4398         int elements = vsz >> dtype_esz[a->dtype];
4399         int off = (a->imm * elements) << dtype_msz(a->dtype);
4400         TCGv_i64 addr = new_tmp_a64(s);
4401
4402         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4403         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4404     }
4405     return true;
4406 }
4407
4408 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4409 {
4410     static gen_helper_gvec_mem * const fns[4] = {
4411         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4412         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4413     };
4414     unsigned vsz = vec_full_reg_size(s);
4415     TCGv_ptr t_pg;
4416     TCGv_i32 desc;
4417
4418     /* Load the first quadword using the normal predicated load helpers.  */
4419     desc = tcg_const_i32(simd_desc(16, 16, zt));
4420     t_pg = tcg_temp_new_ptr();
4421
4422     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4423     fns[msz](cpu_env, t_pg, addr, desc);
4424
4425     tcg_temp_free_ptr(t_pg);
4426     tcg_temp_free_i32(desc);
4427
4428     /* Replicate that first quadword.  */
4429     if (vsz > 16) {
4430         unsigned dofs = vec_full_reg_offset(s, zt);
4431         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4432     }
4433 }
4434
4435 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4436 {
4437     if (a->rm == 31) {
4438         return false;
4439     }
4440     if (sve_access_check(s)) {
4441         int msz = dtype_msz(a->dtype);
4442         TCGv_i64 addr = new_tmp_a64(s);
4443         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4444         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4445         do_ldrq(s, a->rd, a->pg, addr, msz);
4446     }
4447     return true;
4448 }
4449
4450 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4451 {
4452     if (sve_access_check(s)) {
4453         TCGv_i64 addr = new_tmp_a64(s);
4454         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4455         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4456     }
4457     return true;
4458 }
4459
4460 /* Load and broadcast element.  */
4461 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4462 {
4463     if (!sve_access_check(s)) {
4464         return true;
4465     }
4466
4467     unsigned vsz = vec_full_reg_size(s);
4468     unsigned psz = pred_full_reg_size(s);
4469     unsigned esz = dtype_esz[a->dtype];
4470     TCGLabel *over = gen_new_label();
4471     TCGv_i64 temp;
4472
4473     /* If the guarding predicate has no bits set, no load occurs.  */
4474     if (psz <= 8) {
4475         /* Reduce the pred_esz_masks value simply to reduce the
4476          * size of the code generated here.
4477          */
4478         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4479         temp = tcg_temp_new_i64();
4480         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4481         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4482         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4483         tcg_temp_free_i64(temp);
4484     } else {
4485         TCGv_i32 t32 = tcg_temp_new_i32();
4486         find_last_active(s, t32, esz, a->pg);
4487         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4488         tcg_temp_free_i32(t32);
4489     }
4490
4491     /* Load the data.  */
4492     temp = tcg_temp_new_i64();
4493     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4494     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4495                         s->be_data | dtype_mop[a->dtype]);
4496
4497     /* Broadcast to *all* elements.  */
4498     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4499                          vsz, vsz, temp);
4500     tcg_temp_free_i64(temp);
4501
4502     /* Zero the inactive elements.  */
4503     gen_set_label(over);
4504     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4505     return true;
4506 }
4507
4508 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4509                       int msz, int esz, int nreg)
4510 {
4511     static gen_helper_gvec_mem * const fn_single[4][4] = {
4512         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4513           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4514         { NULL,                   gen_helper_sve_st1hh_r,
4515           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4516         { NULL, NULL,
4517           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4518         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4519     };
4520     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4521         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4522           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4523         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4524           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4525         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4526           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4527     };
4528     gen_helper_gvec_mem *fn;
4529
4530     if (nreg == 0) {
4531         /* ST1 */
4532         fn = fn_single[msz][esz];
4533     } else {
4534         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4535         assert(msz == esz);
4536         fn = fn_multiple[nreg - 1][msz];
4537     }
4538     assert(fn != NULL);
4539     do_mem_zpa(s, zt, pg, addr, fn);
4540 }
4541
4542 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4543 {
4544     if (a->rm == 31 || a->msz > a->esz) {
4545         return false;
4546     }
4547     if (sve_access_check(s)) {
4548         TCGv_i64 addr = new_tmp_a64(s);
4549         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4550         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4551         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4552     }
4553     return true;
4554 }
4555
4556 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4557 {
4558     if (a->msz > a->esz) {
4559         return false;
4560     }
4561     if (sve_access_check(s)) {
4562         int vsz = vec_full_reg_size(s);
4563         int elements = vsz >> a->esz;
4564         TCGv_i64 addr = new_tmp_a64(s);
4565
4566         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4567                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4568         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4569     }
4570     return true;
4571 }
4572
4573 /*
4574  *** SVE gather loads / scatter stores
4575  */
4576
4577 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4578                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4579 {
4580     unsigned vsz = vec_full_reg_size(s);
4581     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4582     TCGv_ptr t_zm = tcg_temp_new_ptr();
4583     TCGv_ptr t_pg = tcg_temp_new_ptr();
4584     TCGv_ptr t_zt = tcg_temp_new_ptr();
4585
4586     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4587     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4588     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4589     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4590
4591     tcg_temp_free_ptr(t_zt);
4592     tcg_temp_free_ptr(t_zm);
4593     tcg_temp_free_ptr(t_pg);
4594     tcg_temp_free_i32(desc);
4595 }
4596
4597 /* Indexed by [ff][xs][u][msz].  */
4598 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4599     { { { gen_helper_sve_ldbss_zsu,
4600           gen_helper_sve_ldhss_zsu,
4601           NULL, },
4602         { gen_helper_sve_ldbsu_zsu,
4603           gen_helper_sve_ldhsu_zsu,
4604           gen_helper_sve_ldssu_zsu, } },
4605       { { gen_helper_sve_ldbss_zss,
4606           gen_helper_sve_ldhss_zss,
4607           NULL, },
4608         { gen_helper_sve_ldbsu_zss,
4609           gen_helper_sve_ldhsu_zss,
4610           gen_helper_sve_ldssu_zss, } } },
4611
4612     { { { gen_helper_sve_ldffbss_zsu,
4613           gen_helper_sve_ldffhss_zsu,
4614           NULL, },
4615         { gen_helper_sve_ldffbsu_zsu,
4616           gen_helper_sve_ldffhsu_zsu,
4617           gen_helper_sve_ldffssu_zsu, } },
4618       { { gen_helper_sve_ldffbss_zss,
4619           gen_helper_sve_ldffhss_zss,
4620           NULL, },
4621         { gen_helper_sve_ldffbsu_zss,
4622           gen_helper_sve_ldffhsu_zss,
4623           gen_helper_sve_ldffssu_zss, } } }
4624 };
4625
4626 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4627 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4628     { { { gen_helper_sve_ldbds_zsu,
4629           gen_helper_sve_ldhds_zsu,
4630           gen_helper_sve_ldsds_zsu,
4631           NULL, },
4632         { gen_helper_sve_ldbdu_zsu,
4633           gen_helper_sve_ldhdu_zsu,
4634           gen_helper_sve_ldsdu_zsu,
4635           gen_helper_sve_ldddu_zsu, } },
4636       { { gen_helper_sve_ldbds_zss,
4637           gen_helper_sve_ldhds_zss,
4638           gen_helper_sve_ldsds_zss,
4639           NULL, },
4640         { gen_helper_sve_ldbdu_zss,
4641           gen_helper_sve_ldhdu_zss,
4642           gen_helper_sve_ldsdu_zss,
4643           gen_helper_sve_ldddu_zss, } },
4644       { { gen_helper_sve_ldbds_zd,
4645           gen_helper_sve_ldhds_zd,
4646           gen_helper_sve_ldsds_zd,
4647           NULL, },
4648         { gen_helper_sve_ldbdu_zd,
4649           gen_helper_sve_ldhdu_zd,
4650           gen_helper_sve_ldsdu_zd,
4651           gen_helper_sve_ldddu_zd, } } },
4652
4653     { { { gen_helper_sve_ldffbds_zsu,
4654           gen_helper_sve_ldffhds_zsu,
4655           gen_helper_sve_ldffsds_zsu,
4656           NULL, },
4657         { gen_helper_sve_ldffbdu_zsu,
4658           gen_helper_sve_ldffhdu_zsu,
4659           gen_helper_sve_ldffsdu_zsu,
4660           gen_helper_sve_ldffddu_zsu, } },
4661       { { gen_helper_sve_ldffbds_zss,
4662           gen_helper_sve_ldffhds_zss,
4663           gen_helper_sve_ldffsds_zss,
4664           NULL, },
4665         { gen_helper_sve_ldffbdu_zss,
4666           gen_helper_sve_ldffhdu_zss,
4667           gen_helper_sve_ldffsdu_zss,
4668           gen_helper_sve_ldffddu_zss, } },
4669       { { gen_helper_sve_ldffbds_zd,
4670           gen_helper_sve_ldffhds_zd,
4671           gen_helper_sve_ldffsds_zd,
4672           NULL, },
4673         { gen_helper_sve_ldffbdu_zd,
4674           gen_helper_sve_ldffhdu_zd,
4675           gen_helper_sve_ldffsdu_zd,
4676           gen_helper_sve_ldffddu_zd, } } }
4677 };
4678
4679 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4680 {
4681     gen_helper_gvec_mem_scatter *fn = NULL;
4682
4683     if (!sve_access_check(s)) {
4684         return true;
4685     }
4686
4687     switch (a->esz) {
4688     case MO_32:
4689         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4690         break;
4691     case MO_64:
4692         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4693         break;
4694     }
4695     assert(fn != NULL);
4696
4697     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4698                cpu_reg_sp(s, a->rn), fn);
4699     return true;
4700 }
4701
4702 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4703 {
4704     gen_helper_gvec_mem_scatter *fn = NULL;
4705     TCGv_i64 imm;
4706
4707     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4708         return false;
4709     }
4710     if (!sve_access_check(s)) {
4711         return true;
4712     }
4713
4714     switch (a->esz) {
4715     case MO_32:
4716         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4717         break;
4718     case MO_64:
4719         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4720         break;
4721     }
4722     assert(fn != NULL);
4723
4724     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4725      * by loading the immediate into the scalar parameter.
4726      */
4727     imm = tcg_const_i64(a->imm << a->msz);
4728     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4729     tcg_temp_free_i64(imm);
4730     return true;
4731 }
4732
4733 /* Indexed by [xs][msz].  */
4734 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
4735     { gen_helper_sve_stbs_zsu,
4736       gen_helper_sve_sths_zsu,
4737       gen_helper_sve_stss_zsu, },
4738     { gen_helper_sve_stbs_zss,
4739       gen_helper_sve_sths_zss,
4740       gen_helper_sve_stss_zss, },
4741 };
4742
4743 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4744 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
4745     { gen_helper_sve_stbd_zsu,
4746       gen_helper_sve_sthd_zsu,
4747       gen_helper_sve_stsd_zsu,
4748       gen_helper_sve_stdd_zsu, },
4749     { gen_helper_sve_stbd_zss,
4750       gen_helper_sve_sthd_zss,
4751       gen_helper_sve_stsd_zss,
4752       gen_helper_sve_stdd_zss, },
4753     { gen_helper_sve_stbd_zd,
4754       gen_helper_sve_sthd_zd,
4755       gen_helper_sve_stsd_zd,
4756       gen_helper_sve_stdd_zd, },
4757 };
4758
4759 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4760 {
4761     gen_helper_gvec_mem_scatter *fn;
4762
4763     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4764         return false;
4765     }
4766     if (!sve_access_check(s)) {
4767         return true;
4768     }
4769     switch (a->esz) {
4770     case MO_32:
4771         fn = scatter_store_fn32[a->xs][a->msz];
4772         break;
4773     case MO_64:
4774         fn = scatter_store_fn64[a->xs][a->msz];
4775         break;
4776     default:
4777         g_assert_not_reached();
4778     }
4779     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4780                cpu_reg_sp(s, a->rn), fn);
4781     return true;
4782 }
4783
4784 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
4785 {
4786     gen_helper_gvec_mem_scatter *fn = NULL;
4787     TCGv_i64 imm;
4788
4789     if (a->esz < a->msz) {
4790         return false;
4791     }
4792     if (!sve_access_check(s)) {
4793         return true;
4794     }
4795
4796     switch (a->esz) {
4797     case MO_32:
4798         fn = scatter_store_fn32[0][a->msz];
4799         break;
4800     case MO_64:
4801         fn = scatter_store_fn64[2][a->msz];
4802         break;
4803     }
4804     assert(fn != NULL);
4805
4806     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
4807      * by loading the immediate into the scalar parameter.
4808      */
4809     imm = tcg_const_i64(a->imm << a->msz);
4810     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4811     tcg_temp_free_i64(imm);
4812     return true;
4813 }
4814
4815 /*
4816  * Prefetches
4817  */
4818
4819 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4820 {
4821     /* Prefetch is a nop within QEMU.  */
4822     sve_access_check(s);
4823     return true;
4824 }
4825
4826 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4827 {
4828     if (a->rm == 31) {
4829         return false;
4830     }
4831     /* Prefetch is a nop within QEMU.  */
4832     sve_access_check(s);
4833     return true;
4834 }