target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36
  37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                          TCGv_i64, uint32_t, uint32_t);
  39
  40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                      TCGv_ptr, TCGv_i32);
  42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45 /*
  46  * Helpers for extracting complex instruction fields.
  47  */
  48
  49 /* See e.g. ASR (immediate, predicated).
  50  * Returns -1 for unallocated encoding; diagnose later.
  51  */
  52 static int tszimm_esz(int x)
  53 {
  54     x >>= 3;  /* discard imm3 */
  55     return 31 - clz32(x);
  56 }
  57
  58 static int tszimm_shr(int x)
  59 {
  60     return (16 << tszimm_esz(x)) - x;
  61 }
  62
  63 /* See e.g. LSL (immediate, predicated).  */
  64 static int tszimm_shl(int x)
  65 {
  66     return x - (8 << tszimm_esz(x));
  67 }
  68
  69 static inline int plus1(int x)
  70 {
  71     return x + 1;
  72 }
  73
  74 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  75 static inline int expand_imm_sh8s(int x)
  76 {
  77     return (int8_t)x << (x & 0x100 ? 8 : 0);
  78 }
  79
  80 /*
  81  * Include the generated decoder.
  82  */
  83
  84 #include "decode-sve.inc.c"
  85
  86 /*
  87  * Implement all of the translator functions referenced by the decoder.
  88  */
  89
  90 /* Return the offset info CPUARMState of the predicate vector register Pn.
  91  * Note for this purpose, FFR is P16.
  92  */
  93 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  94 {
  95     return offsetof(CPUARMState, vfp.pregs[regno]);
  96 }
  97
  98 /* Return the byte size of the whole predicate register, VL / 64.  */
  99 static inline int pred_full_reg_size(DisasContext *s)
 100 {
 101     return s->sve_len >> 3;
 102 }
 103
 104 /* Round up the size of a register to a size allowed by
 105  * the tcg vector infrastructure.  Any operation which uses this
 106  * size may assume that the bits above pred_full_reg_size are zero,
 107  * and must leave them the same way.
 108  *
 109  * Note that this is not needed for the vector registers as they
 110  * are always properly sized for tcg vectors.
 111  */
 112 static int size_for_gvec(int size)
 113 {
 114     if (size <= 8) {
 115         return 8;
 116     } else {
 117         return QEMU_ALIGN_UP(size, 16);
 118     }
 119 }
 120
 121 static int pred_gvec_reg_size(DisasContext *s)
 122 {
 123     return size_for_gvec(pred_full_reg_size(s));
 124 }
 125
 126 /* Invoke a vector expander on two Zregs.  */
 127 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 128                          int esz, int rd, int rn)
 129 {
 130     if (sve_access_check(s)) {
 131         unsigned vsz = vec_full_reg_size(s);
 132         gvec_fn(esz, vec_full_reg_offset(s, rd),
 133                 vec_full_reg_offset(s, rn), vsz, vsz);
 134     }
 135     return true;
 136 }
 137
 138 /* Invoke a vector expander on three Zregs.  */
 139 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 140                          int esz, int rd, int rn, int rm)
 141 {
 142     if (sve_access_check(s)) {
 143         unsigned vsz = vec_full_reg_size(s);
 144         gvec_fn(esz, vec_full_reg_offset(s, rd),
 145                 vec_full_reg_offset(s, rn),
 146                 vec_full_reg_offset(s, rm), vsz, vsz);
 147     }
 148     return true;
 149 }
 150
 151 /* Invoke a vector move on two Zregs.  */
 152 static bool do_mov_z(DisasContext *s, int rd, int rn)
 153 {
 154     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 155 }
 156
 157 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 158 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 159 {
 160     unsigned vsz = vec_full_reg_size(s);
 161     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 162 }
 163
 164 /* Invoke a vector expander on two Pregs.  */
 165 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 166                          int esz, int rd, int rn)
 167 {
 168     if (sve_access_check(s)) {
 169         unsigned psz = pred_gvec_reg_size(s);
 170         gvec_fn(esz, pred_full_reg_offset(s, rd),
 171                 pred_full_reg_offset(s, rn), psz, psz);
 172     }
 173     return true;
 174 }
 175
 176 /* Invoke a vector expander on three Pregs.  */
 177 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 178                          int esz, int rd, int rn, int rm)
 179 {
 180     if (sve_access_check(s)) {
 181         unsigned psz = pred_gvec_reg_size(s);
 182         gvec_fn(esz, pred_full_reg_offset(s, rd),
 183                 pred_full_reg_offset(s, rn),
 184                 pred_full_reg_offset(s, rm), psz, psz);
 185     }
 186     return true;
 187 }
 188
 189 /* Invoke a vector operation on four Pregs.  */
 190 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 191                         int rd, int rn, int rm, int rg)
 192 {
 193     if (sve_access_check(s)) {
 194         unsigned psz = pred_gvec_reg_size(s);
 195         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 196                        pred_full_reg_offset(s, rn),
 197                        pred_full_reg_offset(s, rm),
 198                        pred_full_reg_offset(s, rg),
 199                        psz, psz, gvec_op);
 200     }
 201     return true;
 202 }
 203
 204 /* Invoke a vector move on two Pregs.  */
 205 static bool do_mov_p(DisasContext *s, int rd, int rn)
 206 {
 207     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 208 }
 209
 210 /* Set the cpu flags as per a return from an SVE helper.  */
 211 static void do_pred_flags(TCGv_i32 t)
 212 {
 213     tcg_gen_mov_i32(cpu_NF, t);
 214     tcg_gen_andi_i32(cpu_ZF, t, 2);
 215     tcg_gen_andi_i32(cpu_CF, t, 1);
 216     tcg_gen_movi_i32(cpu_VF, 0);
 217 }
 218
 219 /* Subroutines computing the ARM PredTest psuedofunction.  */
 220 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 221 {
 222     TCGv_i32 t = tcg_temp_new_i32();
 223
 224     gen_helper_sve_predtest1(t, d, g);
 225     do_pred_flags(t);
 226     tcg_temp_free_i32(t);
 227 }
 228
 229 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 230 {
 231     TCGv_ptr dptr = tcg_temp_new_ptr();
 232     TCGv_ptr gptr = tcg_temp_new_ptr();
 233     TCGv_i32 t;
 234
 235     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 236     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 237     t = tcg_const_i32(words);
 238
 239     gen_helper_sve_predtest(t, dptr, gptr, t);
 240     tcg_temp_free_ptr(dptr);
 241     tcg_temp_free_ptr(gptr);
 242
 243     do_pred_flags(t);
 244     tcg_temp_free_i32(t);
 245 }
 246
 247 /* For each element size, the bits within a predicate word that are active.  */
 248 const uint64_t pred_esz_masks[4] = {
 249     0xffffffffffffffffull, 0x5555555555555555ull,
 250     0x1111111111111111ull, 0x0101010101010101ull
 251 };
 252
 253 /*
 254  *** SVE Logical - Unpredicated Group
 255  */
 256
 257 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 258 {
 259     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 260 }
 261
 262 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 263 {
 264     if (a->rn == a->rm) { /* MOV */
 265         return do_mov_z(s, a->rd, a->rn);
 266     } else {
 267         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 268     }
 269 }
 270
 271 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 272 {
 273     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 274 }
 275
 276 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 /*
 282  *** SVE Integer Arithmetic - Unpredicated Group
 283  */
 284
 285 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 286 {
 287     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 288 }
 289
 290 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 298 }
 299
 300 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 /*
 316  *** SVE Integer Arithmetic - Binary Predicated Group
 317  */
 318
 319 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 320 {
 321     unsigned vsz = vec_full_reg_size(s);
 322     if (fn == NULL) {
 323         return false;
 324     }
 325     if (sve_access_check(s)) {
 326         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 327                            vec_full_reg_offset(s, a->rn),
 328                            vec_full_reg_offset(s, a->rm),
 329                            pred_full_reg_offset(s, a->pg),
 330                            vsz, vsz, 0, fn);
 331     }
 332     return true;
 333 }
 334
 335 #define DO_ZPZZ(NAME, name) \
 336 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 337                                 uint32_t insn)                            \
 338 {                                                                         \
 339     static gen_helper_gvec_4 * const fns[4] = {                           \
 340         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 341         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 342     };                                                                    \
 343     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 344 }
 345
 346 DO_ZPZZ(AND, and)
 347 DO_ZPZZ(EOR, eor)
 348 DO_ZPZZ(ORR, orr)
 349 DO_ZPZZ(BIC, bic)
 350
 351 DO_ZPZZ(ADD, add)
 352 DO_ZPZZ(SUB, sub)
 353
 354 DO_ZPZZ(SMAX, smax)
 355 DO_ZPZZ(UMAX, umax)
 356 DO_ZPZZ(SMIN, smin)
 357 DO_ZPZZ(UMIN, umin)
 358 DO_ZPZZ(SABD, sabd)
 359 DO_ZPZZ(UABD, uabd)
 360
 361 DO_ZPZZ(MUL, mul)
 362 DO_ZPZZ(SMULH, smulh)
 363 DO_ZPZZ(UMULH, umulh)
 364
 365 DO_ZPZZ(ASR, asr)
 366 DO_ZPZZ(LSR, lsr)
 367 DO_ZPZZ(LSL, lsl)
 368
 369 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 370 {
 371     static gen_helper_gvec_4 * const fns[4] = {
 372         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 373     };
 374     return do_zpzz_ool(s, a, fns[a->esz]);
 375 }
 376
 377 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 378 {
 379     static gen_helper_gvec_4 * const fns[4] = {
 380         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 381     };
 382     return do_zpzz_ool(s, a, fns[a->esz]);
 383 }
 384
 385 DO_ZPZZ(SEL, sel)
 386
 387 #undef DO_ZPZZ
 388
 389 /*
 390  *** SVE Integer Arithmetic - Unary Predicated Group
 391  */
 392
 393 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 394 {
 395     if (fn == NULL) {
 396         return false;
 397     }
 398     if (sve_access_check(s)) {
 399         unsigned vsz = vec_full_reg_size(s);
 400         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 401                            vec_full_reg_offset(s, a->rn),
 402                            pred_full_reg_offset(s, a->pg),
 403                            vsz, vsz, 0, fn);
 404     }
 405     return true;
 406 }
 407
 408 #define DO_ZPZ(NAME, name) \
 409 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 410 {                                                                   \
 411     static gen_helper_gvec_3 * const fns[4] = {                     \
 412         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 413         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 414     };                                                              \
 415     return do_zpz_ool(s, a, fns[a->esz]);                           \
 416 }
 417
 418 DO_ZPZ(CLS, cls)
 419 DO_ZPZ(CLZ, clz)
 420 DO_ZPZ(CNT_zpz, cnt_zpz)
 421 DO_ZPZ(CNOT, cnot)
 422 DO_ZPZ(NOT_zpz, not_zpz)
 423 DO_ZPZ(ABS, abs)
 424 DO_ZPZ(NEG, neg)
 425
 426 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 427 {
 428     static gen_helper_gvec_3 * const fns[4] = {
 429         NULL,
 430         gen_helper_sve_fabs_h,
 431         gen_helper_sve_fabs_s,
 432         gen_helper_sve_fabs_d
 433     };
 434     return do_zpz_ool(s, a, fns[a->esz]);
 435 }
 436
 437 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 438 {
 439     static gen_helper_gvec_3 * const fns[4] = {
 440         NULL,
 441         gen_helper_sve_fneg_h,
 442         gen_helper_sve_fneg_s,
 443         gen_helper_sve_fneg_d
 444     };
 445     return do_zpz_ool(s, a, fns[a->esz]);
 446 }
 447
 448 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 449 {
 450     static gen_helper_gvec_3 * const fns[4] = {
 451         NULL,
 452         gen_helper_sve_sxtb_h,
 453         gen_helper_sve_sxtb_s,
 454         gen_helper_sve_sxtb_d
 455     };
 456     return do_zpz_ool(s, a, fns[a->esz]);
 457 }
 458
 459 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 460 {
 461     static gen_helper_gvec_3 * const fns[4] = {
 462         NULL,
 463         gen_helper_sve_uxtb_h,
 464         gen_helper_sve_uxtb_s,
 465         gen_helper_sve_uxtb_d
 466     };
 467     return do_zpz_ool(s, a, fns[a->esz]);
 468 }
 469
 470 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 471 {
 472     static gen_helper_gvec_3 * const fns[4] = {
 473         NULL, NULL,
 474         gen_helper_sve_sxth_s,
 475         gen_helper_sve_sxth_d
 476     };
 477     return do_zpz_ool(s, a, fns[a->esz]);
 478 }
 479
 480 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 481 {
 482     static gen_helper_gvec_3 * const fns[4] = {
 483         NULL, NULL,
 484         gen_helper_sve_uxth_s,
 485         gen_helper_sve_uxth_d
 486     };
 487     return do_zpz_ool(s, a, fns[a->esz]);
 488 }
 489
 490 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491 {
 492     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 493 }
 494
 495 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 496 {
 497     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 498 }
 499
 500 #undef DO_ZPZ
 501
 502 /*
 503  *** SVE Integer Reduction Group
 504  */
 505
 506 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 507 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 508                        gen_helper_gvec_reduc *fn)
 509 {
 510     unsigned vsz = vec_full_reg_size(s);
 511     TCGv_ptr t_zn, t_pg;
 512     TCGv_i32 desc;
 513     TCGv_i64 temp;
 514
 515     if (fn == NULL) {
 516         return false;
 517     }
 518     if (!sve_access_check(s)) {
 519         return true;
 520     }
 521
 522     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 523     temp = tcg_temp_new_i64();
 524     t_zn = tcg_temp_new_ptr();
 525     t_pg = tcg_temp_new_ptr();
 526
 527     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 528     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 529     fn(temp, t_zn, t_pg, desc);
 530     tcg_temp_free_ptr(t_zn);
 531     tcg_temp_free_ptr(t_pg);
 532     tcg_temp_free_i32(desc);
 533
 534     write_fp_dreg(s, a->rd, temp);
 535     tcg_temp_free_i64(temp);
 536     return true;
 537 }
 538
 539 #define DO_VPZ(NAME, name) \
 540 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 541 {                                                                        \
 542     static gen_helper_gvec_reduc * const fns[4] = {                      \
 543         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 544         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 545     };                                                                   \
 546     return do_vpz_ool(s, a, fns[a->esz]);                                \
 547 }
 548
 549 DO_VPZ(ORV, orv)
 550 DO_VPZ(ANDV, andv)
 551 DO_VPZ(EORV, eorv)
 552
 553 DO_VPZ(UADDV, uaddv)
 554 DO_VPZ(SMAXV, smaxv)
 555 DO_VPZ(UMAXV, umaxv)
 556 DO_VPZ(SMINV, sminv)
 557 DO_VPZ(UMINV, uminv)
 558
 559 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 560 {
 561     static gen_helper_gvec_reduc * const fns[4] = {
 562         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 563         gen_helper_sve_saddv_s, NULL
 564     };
 565     return do_vpz_ool(s, a, fns[a->esz]);
 566 }
 567
 568 #undef DO_VPZ
 569
 570 /*
 571  *** SVE Shift by Immediate - Predicated Group
 572  */
 573
 574 /* Store zero into every active element of Zd.  We will use this for two
 575  * and three-operand predicated instructions for which logic dictates a
 576  * zero result.
 577  */
 578 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 579 {
 580     static gen_helper_gvec_2 * const fns[4] = {
 581         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 582         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 583     };
 584     if (sve_access_check(s)) {
 585         unsigned vsz = vec_full_reg_size(s);
 586         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 587                            pred_full_reg_offset(s, pg),
 588                            vsz, vsz, 0, fns[esz]);
 589     }
 590     return true;
 591 }
 592
 593 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 594                         gen_helper_gvec_3 *fn)
 595 {
 596     if (sve_access_check(s)) {
 597         unsigned vsz = vec_full_reg_size(s);
 598         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 599                            vec_full_reg_offset(s, a->rn),
 600                            pred_full_reg_offset(s, a->pg),
 601                            vsz, vsz, a->imm, fn);
 602     }
 603     return true;
 604 }
 605
 606 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 607 {
 608     static gen_helper_gvec_3 * const fns[4] = {
 609         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 610         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 611     };
 612     if (a->esz < 0) {
 613         /* Invalid tsz encoding -- see tszimm_esz. */
 614         return false;
 615     }
 616     /* Shift by element size is architecturally valid.  For
 617        arithmetic right-shift, it's the same as by one less. */
 618     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 619     return do_zpzi_ool(s, a, fns[a->esz]);
 620 }
 621
 622 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 623 {
 624     static gen_helper_gvec_3 * const fns[4] = {
 625         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 626         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 627     };
 628     if (a->esz < 0) {
 629         return false;
 630     }
 631     /* Shift by element size is architecturally valid.
 632        For logical shifts, it is a zeroing operation.  */
 633     if (a->imm >= (8 << a->esz)) {
 634         return do_clr_zp(s, a->rd, a->pg, a->esz);
 635     } else {
 636         return do_zpzi_ool(s, a, fns[a->esz]);
 637     }
 638 }
 639
 640 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 641 {
 642     static gen_helper_gvec_3 * const fns[4] = {
 643         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 644         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 645     };
 646     if (a->esz < 0) {
 647         return false;
 648     }
 649     /* Shift by element size is architecturally valid.
 650        For logical shifts, it is a zeroing operation.  */
 651     if (a->imm >= (8 << a->esz)) {
 652         return do_clr_zp(s, a->rd, a->pg, a->esz);
 653     } else {
 654         return do_zpzi_ool(s, a, fns[a->esz]);
 655     }
 656 }
 657
 658 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 659 {
 660     static gen_helper_gvec_3 * const fns[4] = {
 661         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 662         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 663     };
 664     if (a->esz < 0) {
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For arithmetic
 668        right shift for division, it is a zeroing operation.  */
 669     if (a->imm >= (8 << a->esz)) {
 670         return do_clr_zp(s, a->rd, a->pg, a->esz);
 671     } else {
 672         return do_zpzi_ool(s, a, fns[a->esz]);
 673     }
 674 }
 675
 676 /*
 677  *** SVE Bitwise Shift - Predicated Group
 678  */
 679
 680 #define DO_ZPZW(NAME, name) \
 681 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 682                                 uint32_t insn)                            \
 683 {                                                                         \
 684     static gen_helper_gvec_4 * const fns[3] = {                           \
 685         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 686         gen_helper_sve_##name##_zpzw_s,                                   \
 687     };                                                                    \
 688     if (a->esz < 0 || a->esz >= 3) {                                      \
 689         return false;                                                     \
 690     }                                                                     \
 691     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 692 }
 693
 694 DO_ZPZW(ASR, asr)
 695 DO_ZPZW(LSR, lsr)
 696 DO_ZPZW(LSL, lsl)
 697
 698 #undef DO_ZPZW
 699
 700 /*
 701  *** SVE Bitwise Shift - Unpredicated Group
 702  */
 703
 704 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 705                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 706                                          int64_t, uint32_t, uint32_t))
 707 {
 708     if (a->esz < 0) {
 709         /* Invalid tsz encoding -- see tszimm_esz. */
 710         return false;
 711     }
 712     if (sve_access_check(s)) {
 713         unsigned vsz = vec_full_reg_size(s);
 714         /* Shift by element size is architecturally valid.  For
 715            arithmetic right-shift, it's the same as by one less.
 716            Otherwise it is a zeroing operation.  */
 717         if (a->imm >= 8 << a->esz) {
 718             if (asr) {
 719                 a->imm = (8 << a->esz) - 1;
 720             } else {
 721                 do_dupi_z(s, a->rd, 0);
 722                 return true;
 723             }
 724         }
 725         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 726                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 727     }
 728     return true;
 729 }
 730
 731 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 732 {
 733     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 734 }
 735
 736 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 737 {
 738     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 739 }
 740
 741 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 742 {
 743     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 744 }
 745
 746 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 747 {
 748     if (fn == NULL) {
 749         return false;
 750     }
 751     if (sve_access_check(s)) {
 752         unsigned vsz = vec_full_reg_size(s);
 753         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 754                            vec_full_reg_offset(s, a->rn),
 755                            vec_full_reg_offset(s, a->rm),
 756                            vsz, vsz, 0, fn);
 757     }
 758     return true;
 759 }
 760
 761 #define DO_ZZW(NAME, name) \
 762 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 763                                uint32_t insn)                             \
 764 {                                                                         \
 765     static gen_helper_gvec_3 * const fns[4] = {                           \
 766         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 767         gen_helper_sve_##name##_zzw_s, NULL                               \
 768     };                                                                    \
 769     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 770 }
 771
 772 DO_ZZW(ASR, asr)
 773 DO_ZZW(LSR, lsr)
 774 DO_ZZW(LSL, lsl)
 775
 776 #undef DO_ZZW
 777
 778 /*
 779  *** SVE Integer Multiply-Add Group
 780  */
 781
 782 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 783                          gen_helper_gvec_5 *fn)
 784 {
 785     if (sve_access_check(s)) {
 786         unsigned vsz = vec_full_reg_size(s);
 787         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 788                            vec_full_reg_offset(s, a->ra),
 789                            vec_full_reg_offset(s, a->rn),
 790                            vec_full_reg_offset(s, a->rm),
 791                            pred_full_reg_offset(s, a->pg),
 792                            vsz, vsz, 0, fn);
 793     }
 794     return true;
 795 }
 796
 797 #define DO_ZPZZZ(NAME, name) \
 798 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 799 {                                                                    \
 800     static gen_helper_gvec_5 * const fns[4] = {                      \
 801         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 802         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 803     };                                                               \
 804     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 805 }
 806
 807 DO_ZPZZZ(MLA, mla)
 808 DO_ZPZZZ(MLS, mls)
 809
 810 #undef DO_ZPZZZ
 811
 812 /*
 813  *** SVE Index Generation Group
 814  */
 815
 816 static void do_index(DisasContext *s, int esz, int rd,
 817                      TCGv_i64 start, TCGv_i64 incr)
 818 {
 819     unsigned vsz = vec_full_reg_size(s);
 820     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 821     TCGv_ptr t_zd = tcg_temp_new_ptr();
 822
 823     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 824     if (esz == 3) {
 825         gen_helper_sve_index_d(t_zd, start, incr, desc);
 826     } else {
 827         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 828         static index_fn * const fns[3] = {
 829             gen_helper_sve_index_b,
 830             gen_helper_sve_index_h,
 831             gen_helper_sve_index_s,
 832         };
 833         TCGv_i32 s32 = tcg_temp_new_i32();
 834         TCGv_i32 i32 = tcg_temp_new_i32();
 835
 836         tcg_gen_extrl_i64_i32(s32, start);
 837         tcg_gen_extrl_i64_i32(i32, incr);
 838         fns[esz](t_zd, s32, i32, desc);
 839
 840         tcg_temp_free_i32(s32);
 841         tcg_temp_free_i32(i32);
 842     }
 843     tcg_temp_free_ptr(t_zd);
 844     tcg_temp_free_i32(desc);
 845 }
 846
 847 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 848 {
 849     if (sve_access_check(s)) {
 850         TCGv_i64 start = tcg_const_i64(a->imm1);
 851         TCGv_i64 incr = tcg_const_i64(a->imm2);
 852         do_index(s, a->esz, a->rd, start, incr);
 853         tcg_temp_free_i64(start);
 854         tcg_temp_free_i64(incr);
 855     }
 856     return true;
 857 }
 858
 859 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 860 {
 861     if (sve_access_check(s)) {
 862         TCGv_i64 start = tcg_const_i64(a->imm);
 863         TCGv_i64 incr = cpu_reg(s, a->rm);
 864         do_index(s, a->esz, a->rd, start, incr);
 865         tcg_temp_free_i64(start);
 866     }
 867     return true;
 868 }
 869
 870 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 871 {
 872     if (sve_access_check(s)) {
 873         TCGv_i64 start = cpu_reg(s, a->rn);
 874         TCGv_i64 incr = tcg_const_i64(a->imm);
 875         do_index(s, a->esz, a->rd, start, incr);
 876         tcg_temp_free_i64(incr);
 877     }
 878     return true;
 879 }
 880
 881 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 882 {
 883     if (sve_access_check(s)) {
 884         TCGv_i64 start = cpu_reg(s, a->rn);
 885         TCGv_i64 incr = cpu_reg(s, a->rm);
 886         do_index(s, a->esz, a->rd, start, incr);
 887     }
 888     return true;
 889 }
 890
 891 /*
 892  *** SVE Stack Allocation Group
 893  */
 894
 895 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 896 {
 897     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 898     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 899     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 900     return true;
 901 }
 902
 903 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 904 {
 905     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 906     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 907     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 908     return true;
 909 }
 910
 911 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 912 {
 913     TCGv_i64 reg = cpu_reg(s, a->rd);
 914     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 915     return true;
 916 }
 917
 918 /*
 919  *** SVE Compute Vector Address Group
 920  */
 921
 922 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 923 {
 924     if (sve_access_check(s)) {
 925         unsigned vsz = vec_full_reg_size(s);
 926         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 927                            vec_full_reg_offset(s, a->rn),
 928                            vec_full_reg_offset(s, a->rm),
 929                            vsz, vsz, a->imm, fn);
 930     }
 931     return true;
 932 }
 933
 934 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 935 {
 936     return do_adr(s, a, gen_helper_sve_adr_p32);
 937 }
 938
 939 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 940 {
 941     return do_adr(s, a, gen_helper_sve_adr_p64);
 942 }
 943
 944 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 945 {
 946     return do_adr(s, a, gen_helper_sve_adr_s32);
 947 }
 948
 949 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 950 {
 951     return do_adr(s, a, gen_helper_sve_adr_u32);
 952 }
 953
 954 /*
 955  *** SVE Integer Misc - Unpredicated Group
 956  */
 957
 958 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 959 {
 960     static gen_helper_gvec_2 * const fns[4] = {
 961         NULL,
 962         gen_helper_sve_fexpa_h,
 963         gen_helper_sve_fexpa_s,
 964         gen_helper_sve_fexpa_d,
 965     };
 966     if (a->esz == 0) {
 967         return false;
 968     }
 969     if (sve_access_check(s)) {
 970         unsigned vsz = vec_full_reg_size(s);
 971         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 972                            vec_full_reg_offset(s, a->rn),
 973                            vsz, vsz, 0, fns[a->esz]);
 974     }
 975     return true;
 976 }
 977
 978 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 979 {
 980     static gen_helper_gvec_3 * const fns[4] = {
 981         NULL,
 982         gen_helper_sve_ftssel_h,
 983         gen_helper_sve_ftssel_s,
 984         gen_helper_sve_ftssel_d,
 985     };
 986     if (a->esz == 0) {
 987         return false;
 988     }
 989     if (sve_access_check(s)) {
 990         unsigned vsz = vec_full_reg_size(s);
 991         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 992                            vec_full_reg_offset(s, a->rn),
 993                            vec_full_reg_offset(s, a->rm),
 994                            vsz, vsz, 0, fns[a->esz]);
 995     }
 996     return true;
 997 }
 998
 999 /*
1000  *** SVE Predicate Logical Operations Group
1001  */
1002
1003 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1004                           const GVecGen4 *gvec_op)
1005 {
1006     if (!sve_access_check(s)) {
1007         return true;
1008     }
1009
1010     unsigned psz = pred_gvec_reg_size(s);
1011     int dofs = pred_full_reg_offset(s, a->rd);
1012     int nofs = pred_full_reg_offset(s, a->rn);
1013     int mofs = pred_full_reg_offset(s, a->rm);
1014     int gofs = pred_full_reg_offset(s, a->pg);
1015
1016     if (psz == 8) {
1017         /* Do the operation and the flags generation in temps.  */
1018         TCGv_i64 pd = tcg_temp_new_i64();
1019         TCGv_i64 pn = tcg_temp_new_i64();
1020         TCGv_i64 pm = tcg_temp_new_i64();
1021         TCGv_i64 pg = tcg_temp_new_i64();
1022
1023         tcg_gen_ld_i64(pn, cpu_env, nofs);
1024         tcg_gen_ld_i64(pm, cpu_env, mofs);
1025         tcg_gen_ld_i64(pg, cpu_env, gofs);
1026
1027         gvec_op->fni8(pd, pn, pm, pg);
1028         tcg_gen_st_i64(pd, cpu_env, dofs);
1029
1030         do_predtest1(pd, pg);
1031
1032         tcg_temp_free_i64(pd);
1033         tcg_temp_free_i64(pn);
1034         tcg_temp_free_i64(pm);
1035         tcg_temp_free_i64(pg);
1036     } else {
1037         /* The operation and flags generation is large.  The computation
1038          * of the flags depends on the original contents of the guarding
1039          * predicate.  If the destination overwrites the guarding predicate,
1040          * then the easiest way to get this right is to save a copy.
1041           */
1042         int tofs = gofs;
1043         if (a->rd == a->pg) {
1044             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1045             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1046         }
1047
1048         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049         do_predtest(s, dofs, tofs, psz / 8);
1050     }
1051     return true;
1052 }
1053
1054 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1055 {
1056     tcg_gen_and_i64(pd, pn, pm);
1057     tcg_gen_and_i64(pd, pd, pg);
1058 }
1059
1060 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1061                            TCGv_vec pm, TCGv_vec pg)
1062 {
1063     tcg_gen_and_vec(vece, pd, pn, pm);
1064     tcg_gen_and_vec(vece, pd, pd, pg);
1065 }
1066
1067 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1068 {
1069     static const GVecGen4 op = {
1070         .fni8 = gen_and_pg_i64,
1071         .fniv = gen_and_pg_vec,
1072         .fno = gen_helper_sve_and_pppp,
1073         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1074     };
1075     if (a->s) {
1076         return do_pppp_flags(s, a, &op);
1077     } else if (a->rn == a->rm) {
1078         if (a->pg == a->rn) {
1079             return do_mov_p(s, a->rd, a->rn);
1080         } else {
1081             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1082         }
1083     } else if (a->pg == a->rn || a->pg == a->rm) {
1084         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1085     } else {
1086         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1087     }
1088 }
1089
1090 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091 {
1092     tcg_gen_andc_i64(pd, pn, pm);
1093     tcg_gen_and_i64(pd, pd, pg);
1094 }
1095
1096 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097                            TCGv_vec pm, TCGv_vec pg)
1098 {
1099     tcg_gen_andc_vec(vece, pd, pn, pm);
1100     tcg_gen_and_vec(vece, pd, pd, pg);
1101 }
1102
1103 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1104 {
1105     static const GVecGen4 op = {
1106         .fni8 = gen_bic_pg_i64,
1107         .fniv = gen_bic_pg_vec,
1108         .fno = gen_helper_sve_bic_pppp,
1109         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110     };
1111     if (a->s) {
1112         return do_pppp_flags(s, a, &op);
1113     } else if (a->pg == a->rn) {
1114         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1115     } else {
1116         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1117     }
1118 }
1119
1120 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1121 {
1122     tcg_gen_xor_i64(pd, pn, pm);
1123     tcg_gen_and_i64(pd, pd, pg);
1124 }
1125
1126 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1127                            TCGv_vec pm, TCGv_vec pg)
1128 {
1129     tcg_gen_xor_vec(vece, pd, pn, pm);
1130     tcg_gen_and_vec(vece, pd, pd, pg);
1131 }
1132
1133 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1134 {
1135     static const GVecGen4 op = {
1136         .fni8 = gen_eor_pg_i64,
1137         .fniv = gen_eor_pg_vec,
1138         .fno = gen_helper_sve_eor_pppp,
1139         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1140     };
1141     if (a->s) {
1142         return do_pppp_flags(s, a, &op);
1143     } else {
1144         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1145     }
1146 }
1147
1148 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1149 {
1150     tcg_gen_and_i64(pn, pn, pg);
1151     tcg_gen_andc_i64(pm, pm, pg);
1152     tcg_gen_or_i64(pd, pn, pm);
1153 }
1154
1155 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1156                            TCGv_vec pm, TCGv_vec pg)
1157 {
1158     tcg_gen_and_vec(vece, pn, pn, pg);
1159     tcg_gen_andc_vec(vece, pm, pm, pg);
1160     tcg_gen_or_vec(vece, pd, pn, pm);
1161 }
1162
1163 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1164 {
1165     static const GVecGen4 op = {
1166         .fni8 = gen_sel_pg_i64,
1167         .fniv = gen_sel_pg_vec,
1168         .fno = gen_helper_sve_sel_pppp,
1169         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1170     };
1171     if (a->s) {
1172         return false;
1173     } else {
1174         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1175     }
1176 }
1177
1178 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1179 {
1180     tcg_gen_or_i64(pd, pn, pm);
1181     tcg_gen_and_i64(pd, pd, pg);
1182 }
1183
1184 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1185                            TCGv_vec pm, TCGv_vec pg)
1186 {
1187     tcg_gen_or_vec(vece, pd, pn, pm);
1188     tcg_gen_and_vec(vece, pd, pd, pg);
1189 }
1190
1191 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1192 {
1193     static const GVecGen4 op = {
1194         .fni8 = gen_orr_pg_i64,
1195         .fniv = gen_orr_pg_vec,
1196         .fno = gen_helper_sve_orr_pppp,
1197         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1198     };
1199     if (a->s) {
1200         return do_pppp_flags(s, a, &op);
1201     } else if (a->pg == a->rn && a->rn == a->rm) {
1202         return do_mov_p(s, a->rd, a->rn);
1203     } else {
1204         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1205     }
1206 }
1207
1208 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1209 {
1210     tcg_gen_orc_i64(pd, pn, pm);
1211     tcg_gen_and_i64(pd, pd, pg);
1212 }
1213
1214 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1215                            TCGv_vec pm, TCGv_vec pg)
1216 {
1217     tcg_gen_orc_vec(vece, pd, pn, pm);
1218     tcg_gen_and_vec(vece, pd, pd, pg);
1219 }
1220
1221 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1222 {
1223     static const GVecGen4 op = {
1224         .fni8 = gen_orn_pg_i64,
1225         .fniv = gen_orn_pg_vec,
1226         .fno = gen_helper_sve_orn_pppp,
1227         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1228     };
1229     if (a->s) {
1230         return do_pppp_flags(s, a, &op);
1231     } else {
1232         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1233     }
1234 }
1235
1236 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1237 {
1238     tcg_gen_or_i64(pd, pn, pm);
1239     tcg_gen_andc_i64(pd, pg, pd);
1240 }
1241
1242 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1243                            TCGv_vec pm, TCGv_vec pg)
1244 {
1245     tcg_gen_or_vec(vece, pd, pn, pm);
1246     tcg_gen_andc_vec(vece, pd, pg, pd);
1247 }
1248
1249 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1250 {
1251     static const GVecGen4 op = {
1252         .fni8 = gen_nor_pg_i64,
1253         .fniv = gen_nor_pg_vec,
1254         .fno = gen_helper_sve_nor_pppp,
1255         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1256     };
1257     if (a->s) {
1258         return do_pppp_flags(s, a, &op);
1259     } else {
1260         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261     }
1262 }
1263
1264 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266     tcg_gen_and_i64(pd, pn, pm);
1267     tcg_gen_andc_i64(pd, pg, pd);
1268 }
1269
1270 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                            TCGv_vec pm, TCGv_vec pg)
1272 {
1273     tcg_gen_and_vec(vece, pd, pn, pm);
1274     tcg_gen_andc_vec(vece, pd, pg, pd);
1275 }
1276
1277 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279     static const GVecGen4 op = {
1280         .fni8 = gen_nand_pg_i64,
1281         .fniv = gen_nand_pg_vec,
1282         .fno = gen_helper_sve_nand_pppp,
1283         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284     };
1285     if (a->s) {
1286         return do_pppp_flags(s, a, &op);
1287     } else {
1288         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289     }
1290 }
1291
1292 /*
1293  *** SVE Predicate Misc Group
1294  */
1295
1296 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1297 {
1298     if (sve_access_check(s)) {
1299         int nofs = pred_full_reg_offset(s, a->rn);
1300         int gofs = pred_full_reg_offset(s, a->pg);
1301         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1302
1303         if (words == 1) {
1304             TCGv_i64 pn = tcg_temp_new_i64();
1305             TCGv_i64 pg = tcg_temp_new_i64();
1306
1307             tcg_gen_ld_i64(pn, cpu_env, nofs);
1308             tcg_gen_ld_i64(pg, cpu_env, gofs);
1309             do_predtest1(pn, pg);
1310
1311             tcg_temp_free_i64(pn);
1312             tcg_temp_free_i64(pg);
1313         } else {
1314             do_predtest(s, nofs, gofs, words);
1315         }
1316     }
1317     return true;
1318 }
1319
1320 /* See the ARM pseudocode DecodePredCount.  */
1321 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1322 {
1323     unsigned elements = fullsz >> esz;
1324     unsigned bound;
1325
1326     switch (pattern) {
1327     case 0x0: /* POW2 */
1328         return pow2floor(elements);
1329     case 0x1: /* VL1 */
1330     case 0x2: /* VL2 */
1331     case 0x3: /* VL3 */
1332     case 0x4: /* VL4 */
1333     case 0x5: /* VL5 */
1334     case 0x6: /* VL6 */
1335     case 0x7: /* VL7 */
1336     case 0x8: /* VL8 */
1337         bound = pattern;
1338         break;
1339     case 0x9: /* VL16 */
1340     case 0xa: /* VL32 */
1341     case 0xb: /* VL64 */
1342     case 0xc: /* VL128 */
1343     case 0xd: /* VL256 */
1344         bound = 16 << (pattern - 9);
1345         break;
1346     case 0x1d: /* MUL4 */
1347         return elements - elements % 4;
1348     case 0x1e: /* MUL3 */
1349         return elements - elements % 3;
1350     case 0x1f: /* ALL */
1351         return elements;
1352     default:   /* #uimm5 */
1353         return 0;
1354     }
1355     return elements >= bound ? bound : 0;
1356 }
1357
1358 /* This handles all of the predicate initialization instructions,
1359  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1360  * so that decode_pred_count returns 0.  For SETFFR, we will have
1361  * set RD == 16 == FFR.
1362  */
1363 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1364 {
1365     if (!sve_access_check(s)) {
1366         return true;
1367     }
1368
1369     unsigned fullsz = vec_full_reg_size(s);
1370     unsigned ofs = pred_full_reg_offset(s, rd);
1371     unsigned numelem, setsz, i;
1372     uint64_t word, lastword;
1373     TCGv_i64 t;
1374
1375     numelem = decode_pred_count(fullsz, pat, esz);
1376
1377     /* Determine what we must store into each bit, and how many.  */
1378     if (numelem == 0) {
1379         lastword = word = 0;
1380         setsz = fullsz;
1381     } else {
1382         setsz = numelem << esz;
1383         lastword = word = pred_esz_masks[esz];
1384         if (setsz % 64) {
1385             lastword &= ~(-1ull << (setsz % 64));
1386         }
1387     }
1388
1389     t = tcg_temp_new_i64();
1390     if (fullsz <= 64) {
1391         tcg_gen_movi_i64(t, lastword);
1392         tcg_gen_st_i64(t, cpu_env, ofs);
1393         goto done;
1394     }
1395
1396     if (word == lastword) {
1397         unsigned maxsz = size_for_gvec(fullsz / 8);
1398         unsigned oprsz = size_for_gvec(setsz / 8);
1399
1400         if (oprsz * 8 == setsz) {
1401             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1402             goto done;
1403         }
1404         if (oprsz * 8 == setsz + 8) {
1405             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1406             tcg_gen_movi_i64(t, 0);
1407             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1408             goto done;
1409         }
1410     }
1411
1412     setsz /= 8;
1413     fullsz /= 8;
1414
1415     tcg_gen_movi_i64(t, word);
1416     for (i = 0; i < setsz; i += 8) {
1417         tcg_gen_st_i64(t, cpu_env, ofs + i);
1418     }
1419     if (lastword != word) {
1420         tcg_gen_movi_i64(t, lastword);
1421         tcg_gen_st_i64(t, cpu_env, ofs + i);
1422         i += 8;
1423     }
1424     if (i < fullsz) {
1425         tcg_gen_movi_i64(t, 0);
1426         for (; i < fullsz; i += 8) {
1427             tcg_gen_st_i64(t, cpu_env, ofs + i);
1428         }
1429     }
1430
1431  done:
1432     tcg_temp_free_i64(t);
1433
1434     /* PTRUES */
1435     if (setflag) {
1436         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1437         tcg_gen_movi_i32(cpu_CF, word == 0);
1438         tcg_gen_movi_i32(cpu_VF, 0);
1439         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1440     }
1441     return true;
1442 }
1443
1444 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1445 {
1446     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1447 }
1448
1449 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1450 {
1451     /* Note pat == 31 is #all, to set all elements.  */
1452     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1453 }
1454
1455 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1456 {
1457     /* Note pat == 32 is #unimp, to set no elements.  */
1458     return do_predset(s, 0, a->rd, 32, false);
1459 }
1460
1461 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1462 {
1463     /* The path through do_pppp_flags is complicated enough to want to avoid
1464      * duplication.  Frob the arguments into the form of a predicated AND.
1465      */
1466     arg_rprr_s alt_a = {
1467         .rd = a->rd, .pg = a->pg, .s = a->s,
1468         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1469     };
1470     return trans_AND_pppp(s, &alt_a, insn);
1471 }
1472
1473 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1474 {
1475     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1476 }
1477
1478 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1479 {
1480     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1481 }
1482
1483 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1484                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1485                                            TCGv_ptr, TCGv_i32))
1486 {
1487     if (!sve_access_check(s)) {
1488         return true;
1489     }
1490
1491     TCGv_ptr t_pd = tcg_temp_new_ptr();
1492     TCGv_ptr t_pg = tcg_temp_new_ptr();
1493     TCGv_i32 t;
1494     unsigned desc;
1495
1496     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1497     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1498
1499     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1500     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1501     t = tcg_const_i32(desc);
1502
1503     gen_fn(t, t_pd, t_pg, t);
1504     tcg_temp_free_ptr(t_pd);
1505     tcg_temp_free_ptr(t_pg);
1506
1507     do_pred_flags(t);
1508     tcg_temp_free_i32(t);
1509     return true;
1510 }
1511
1512 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1513 {
1514     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1515 }
1516
1517 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1518 {
1519     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1520 }
1521
1522 /*
1523  *** SVE Element Count Group
1524  */
1525
1526 /* Perform an inline saturating addition of a 32-bit value within
1527  * a 64-bit register.  The second operand is known to be positive,
1528  * which halves the comparisions we must perform to bound the result.
1529  */
1530 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1531 {
1532     int64_t ibound;
1533     TCGv_i64 bound;
1534     TCGCond cond;
1535
1536     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1537     if (u) {
1538         tcg_gen_ext32u_i64(reg, reg);
1539     } else {
1540         tcg_gen_ext32s_i64(reg, reg);
1541     }
1542     if (d) {
1543         tcg_gen_sub_i64(reg, reg, val);
1544         ibound = (u ? 0 : INT32_MIN);
1545         cond = TCG_COND_LT;
1546     } else {
1547         tcg_gen_add_i64(reg, reg, val);
1548         ibound = (u ? UINT32_MAX : INT32_MAX);
1549         cond = TCG_COND_GT;
1550     }
1551     bound = tcg_const_i64(ibound);
1552     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1553     tcg_temp_free_i64(bound);
1554 }
1555
1556 /* Similarly with 64-bit values.  */
1557 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1558 {
1559     TCGv_i64 t0 = tcg_temp_new_i64();
1560     TCGv_i64 t1 = tcg_temp_new_i64();
1561     TCGv_i64 t2;
1562
1563     if (u) {
1564         if (d) {
1565             tcg_gen_sub_i64(t0, reg, val);
1566             tcg_gen_movi_i64(t1, 0);
1567             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1568         } else {
1569             tcg_gen_add_i64(t0, reg, val);
1570             tcg_gen_movi_i64(t1, -1);
1571             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1572         }
1573     } else {
1574         if (d) {
1575             /* Detect signed overflow for subtraction.  */
1576             tcg_gen_xor_i64(t0, reg, val);
1577             tcg_gen_sub_i64(t1, reg, val);
1578             tcg_gen_xor_i64(reg, reg, t0);
1579             tcg_gen_and_i64(t0, t0, reg);
1580
1581             /* Bound the result.  */
1582             tcg_gen_movi_i64(reg, INT64_MIN);
1583             t2 = tcg_const_i64(0);
1584             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1585         } else {
1586             /* Detect signed overflow for addition.  */
1587             tcg_gen_xor_i64(t0, reg, val);
1588             tcg_gen_add_i64(reg, reg, val);
1589             tcg_gen_xor_i64(t1, reg, val);
1590             tcg_gen_andc_i64(t0, t1, t0);
1591
1592             /* Bound the result.  */
1593             tcg_gen_movi_i64(t1, INT64_MAX);
1594             t2 = tcg_const_i64(0);
1595             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1596         }
1597         tcg_temp_free_i64(t2);
1598     }
1599     tcg_temp_free_i64(t0);
1600     tcg_temp_free_i64(t1);
1601 }
1602
1603 /* Similarly with a vector and a scalar operand.  */
1604 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1605                               TCGv_i64 val, bool u, bool d)
1606 {
1607     unsigned vsz = vec_full_reg_size(s);
1608     TCGv_ptr dptr, nptr;
1609     TCGv_i32 t32, desc;
1610     TCGv_i64 t64;
1611
1612     dptr = tcg_temp_new_ptr();
1613     nptr = tcg_temp_new_ptr();
1614     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1615     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1616     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1617
1618     switch (esz) {
1619     case MO_8:
1620         t32 = tcg_temp_new_i32();
1621         tcg_gen_extrl_i64_i32(t32, val);
1622         if (d) {
1623             tcg_gen_neg_i32(t32, t32);
1624         }
1625         if (u) {
1626             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1627         } else {
1628             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1629         }
1630         tcg_temp_free_i32(t32);
1631         break;
1632
1633     case MO_16:
1634         t32 = tcg_temp_new_i32();
1635         tcg_gen_extrl_i64_i32(t32, val);
1636         if (d) {
1637             tcg_gen_neg_i32(t32, t32);
1638         }
1639         if (u) {
1640             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1641         } else {
1642             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1643         }
1644         tcg_temp_free_i32(t32);
1645         break;
1646
1647     case MO_32:
1648         t64 = tcg_temp_new_i64();
1649         if (d) {
1650             tcg_gen_neg_i64(t64, val);
1651         } else {
1652             tcg_gen_mov_i64(t64, val);
1653         }
1654         if (u) {
1655             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1656         } else {
1657             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1658         }
1659         tcg_temp_free_i64(t64);
1660         break;
1661
1662     case MO_64:
1663         if (u) {
1664             if (d) {
1665                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1666             } else {
1667                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1668             }
1669         } else if (d) {
1670             t64 = tcg_temp_new_i64();
1671             tcg_gen_neg_i64(t64, val);
1672             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1673             tcg_temp_free_i64(t64);
1674         } else {
1675             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1676         }
1677         break;
1678
1679     default:
1680         g_assert_not_reached();
1681     }
1682
1683     tcg_temp_free_ptr(dptr);
1684     tcg_temp_free_ptr(nptr);
1685     tcg_temp_free_i32(desc);
1686 }
1687
1688 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1689 {
1690     if (sve_access_check(s)) {
1691         unsigned fullsz = vec_full_reg_size(s);
1692         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1693         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1694     }
1695     return true;
1696 }
1697
1698 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1699 {
1700     if (sve_access_check(s)) {
1701         unsigned fullsz = vec_full_reg_size(s);
1702         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1703         int inc = numelem * a->imm * (a->d ? -1 : 1);
1704         TCGv_i64 reg = cpu_reg(s, a->rd);
1705
1706         tcg_gen_addi_i64(reg, reg, inc);
1707     }
1708     return true;
1709 }
1710
1711 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1712                                uint32_t insn)
1713 {
1714     if (!sve_access_check(s)) {
1715         return true;
1716     }
1717
1718     unsigned fullsz = vec_full_reg_size(s);
1719     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1720     int inc = numelem * a->imm;
1721     TCGv_i64 reg = cpu_reg(s, a->rd);
1722
1723     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1724     if (inc == 0) {
1725         if (a->u) {
1726             tcg_gen_ext32u_i64(reg, reg);
1727         } else {
1728             tcg_gen_ext32s_i64(reg, reg);
1729         }
1730     } else {
1731         TCGv_i64 t = tcg_const_i64(inc);
1732         do_sat_addsub_32(reg, t, a->u, a->d);
1733         tcg_temp_free_i64(t);
1734     }
1735     return true;
1736 }
1737
1738 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1739                                uint32_t insn)
1740 {
1741     if (!sve_access_check(s)) {
1742         return true;
1743     }
1744
1745     unsigned fullsz = vec_full_reg_size(s);
1746     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1747     int inc = numelem * a->imm;
1748     TCGv_i64 reg = cpu_reg(s, a->rd);
1749
1750     if (inc != 0) {
1751         TCGv_i64 t = tcg_const_i64(inc);
1752         do_sat_addsub_64(reg, t, a->u, a->d);
1753         tcg_temp_free_i64(t);
1754     }
1755     return true;
1756 }
1757
1758 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1759 {
1760     if (a->esz == 0) {
1761         return false;
1762     }
1763
1764     unsigned fullsz = vec_full_reg_size(s);
1765     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766     int inc = numelem * a->imm;
1767
1768     if (inc != 0) {
1769         if (sve_access_check(s)) {
1770             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1771             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1772                               vec_full_reg_offset(s, a->rn),
1773                               t, fullsz, fullsz);
1774             tcg_temp_free_i64(t);
1775         }
1776     } else {
1777         do_mov_z(s, a->rd, a->rn);
1778     }
1779     return true;
1780 }
1781
1782 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1783                             uint32_t insn)
1784 {
1785     if (a->esz == 0) {
1786         return false;
1787     }
1788
1789     unsigned fullsz = vec_full_reg_size(s);
1790     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791     int inc = numelem * a->imm;
1792
1793     if (inc != 0) {
1794         if (sve_access_check(s)) {
1795             TCGv_i64 t = tcg_const_i64(inc);
1796             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797             tcg_temp_free_i64(t);
1798         }
1799     } else {
1800         do_mov_z(s, a->rd, a->rn);
1801     }
1802     return true;
1803 }
1804
1805 /*
1806  *** SVE Bitwise Immediate Group
1807  */
1808
1809 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810 {
1811     uint64_t imm;
1812     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813                                 extract32(a->dbm, 0, 6),
1814                                 extract32(a->dbm, 6, 6))) {
1815         return false;
1816     }
1817     if (sve_access_check(s)) {
1818         unsigned vsz = vec_full_reg_size(s);
1819         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821     }
1822     return true;
1823 }
1824
1825 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1826 {
1827     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828 }
1829
1830 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1831 {
1832     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833 }
1834
1835 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1836 {
1837     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838 }
1839
1840 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1841 {
1842     uint64_t imm;
1843     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844                                 extract32(a->dbm, 0, 6),
1845                                 extract32(a->dbm, 6, 6))) {
1846         return false;
1847     }
1848     if (sve_access_check(s)) {
1849         do_dupi_z(s, a->rd, imm);
1850     }
1851     return true;
1852 }
1853
1854 /*
1855  *** SVE Integer Wide Immediate - Predicated Group
1856  */
1857
1858 /* Implement all merging copies.  This is used for CPY (immediate),
1859  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860  */
1861 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862                      TCGv_i64 val)
1863 {
1864     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865     static gen_cpy * const fns[4] = {
1866         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868     };
1869     unsigned vsz = vec_full_reg_size(s);
1870     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871     TCGv_ptr t_zd = tcg_temp_new_ptr();
1872     TCGv_ptr t_zn = tcg_temp_new_ptr();
1873     TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879     fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881     tcg_temp_free_ptr(t_zd);
1882     tcg_temp_free_ptr(t_zn);
1883     tcg_temp_free_ptr(t_pg);
1884     tcg_temp_free_i32(desc);
1885 }
1886
1887 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1888 {
1889     if (a->esz == 0) {
1890         return false;
1891     }
1892     if (sve_access_check(s)) {
1893         /* Decode the VFP immediate.  */
1894         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895         TCGv_i64 t_imm = tcg_const_i64(imm);
1896         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897         tcg_temp_free_i64(t_imm);
1898     }
1899     return true;
1900 }
1901
1902 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1903 {
1904     if (a->esz == 0 && extract32(insn, 13, 1)) {
1905         return false;
1906     }
1907     if (sve_access_check(s)) {
1908         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910         tcg_temp_free_i64(t_imm);
1911     }
1912     return true;
1913 }
1914
1915 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1916 {
1917     static gen_helper_gvec_2i * const fns[4] = {
1918         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920     };
1921
1922     if (a->esz == 0 && extract32(insn, 13, 1)) {
1923         return false;
1924     }
1925     if (sve_access_check(s)) {
1926         unsigned vsz = vec_full_reg_size(s);
1927         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929                             pred_full_reg_offset(s, a->pg),
1930                             t_imm, vsz, vsz, 0, fns[a->esz]);
1931         tcg_temp_free_i64(t_imm);
1932     }
1933     return true;
1934 }
1935
1936 /*
1937  *** SVE Permute Extract Group
1938  */
1939
1940 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1941 {
1942     if (!sve_access_check(s)) {
1943         return true;
1944     }
1945
1946     unsigned vsz = vec_full_reg_size(s);
1947     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948     unsigned n_siz = vsz - n_ofs;
1949     unsigned d = vec_full_reg_offset(s, a->rd);
1950     unsigned n = vec_full_reg_offset(s, a->rn);
1951     unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953     /* Use host vector move insns if we have appropriate sizes
1954      * and no unfortunate overlap.
1955      */
1956     if (m != d
1957         && n_ofs == size_for_gvec(n_ofs)
1958         && n_siz == size_for_gvec(n_siz)
1959         && (d != n || n_siz <= n_ofs)) {
1960         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961         if (n_ofs != 0) {
1962             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963         }
1964     } else {
1965         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966     }
1967     return true;
1968 }
1969
1970 /*
1971  *** SVE Permute - Unpredicated Group
1972  */
1973
1974 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1975 {
1976     if (sve_access_check(s)) {
1977         unsigned vsz = vec_full_reg_size(s);
1978         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979                              vsz, vsz, cpu_reg_sp(s, a->rn));
1980     }
1981     return true;
1982 }
1983
1984 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1985 {
1986     if ((a->imm & 0x1f) == 0) {
1987         return false;
1988     }
1989     if (sve_access_check(s)) {
1990         unsigned vsz = vec_full_reg_size(s);
1991         unsigned dofs = vec_full_reg_offset(s, a->rd);
1992         unsigned esz, index;
1993
1994         esz = ctz32(a->imm);
1995         index = a->imm >> (esz + 1);
1996
1997         if ((index << esz) < vsz) {
1998             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000         } else {
2001             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2002         }
2003     }
2004     return true;
2005 }
2006
2007 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2008 {
2009     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2010     static gen_insr * const fns[4] = {
2011         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2012         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2013     };
2014     unsigned vsz = vec_full_reg_size(s);
2015     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2016     TCGv_ptr t_zd = tcg_temp_new_ptr();
2017     TCGv_ptr t_zn = tcg_temp_new_ptr();
2018
2019     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2020     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2021
2022     fns[a->esz](t_zd, t_zn, val, desc);
2023
2024     tcg_temp_free_ptr(t_zd);
2025     tcg_temp_free_ptr(t_zn);
2026     tcg_temp_free_i32(desc);
2027 }
2028
2029 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2030 {
2031     if (sve_access_check(s)) {
2032         TCGv_i64 t = tcg_temp_new_i64();
2033         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2034         do_insr_i64(s, a, t);
2035         tcg_temp_free_i64(t);
2036     }
2037     return true;
2038 }
2039
2040 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2041 {
2042     if (sve_access_check(s)) {
2043         do_insr_i64(s, a, cpu_reg(s, a->rm));
2044     }
2045     return true;
2046 }
2047
2048 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2049 {
2050     static gen_helper_gvec_2 * const fns[4] = {
2051         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2052         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2053     };
2054
2055     if (sve_access_check(s)) {
2056         unsigned vsz = vec_full_reg_size(s);
2057         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2058                            vec_full_reg_offset(s, a->rn),
2059                            vsz, vsz, 0, fns[a->esz]);
2060     }
2061     return true;
2062 }
2063
2064 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2065 {
2066     static gen_helper_gvec_3 * const fns[4] = {
2067         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2068         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2069     };
2070
2071     if (sve_access_check(s)) {
2072         unsigned vsz = vec_full_reg_size(s);
2073         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2074                            vec_full_reg_offset(s, a->rn),
2075                            vec_full_reg_offset(s, a->rm),
2076                            vsz, vsz, 0, fns[a->esz]);
2077     }
2078     return true;
2079 }
2080
2081 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2082 {
2083     static gen_helper_gvec_2 * const fns[4][2] = {
2084         { NULL, NULL },
2085         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2086         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2087         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2088     };
2089
2090     if (a->esz == 0) {
2091         return false;
2092     }
2093     if (sve_access_check(s)) {
2094         unsigned vsz = vec_full_reg_size(s);
2095         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2096                            vec_full_reg_offset(s, a->rn)
2097                            + (a->h ? vsz / 2 : 0),
2098                            vsz, vsz, 0, fns[a->esz][a->u]);
2099     }
2100     return true;
2101 }
2102
2103 /*
2104  *** SVE Permute - Predicates Group
2105  */
2106
2107 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2108                           gen_helper_gvec_3 *fn)
2109 {
2110     if (!sve_access_check(s)) {
2111         return true;
2112     }
2113
2114     unsigned vsz = pred_full_reg_size(s);
2115
2116     /* Predicate sizes may be smaller and cannot use simd_desc.
2117        We cannot round up, as we do elsewhere, because we need
2118        the exact size for ZIP2 and REV.  We retain the style for
2119        the other helpers for consistency.  */
2120     TCGv_ptr t_d = tcg_temp_new_ptr();
2121     TCGv_ptr t_n = tcg_temp_new_ptr();
2122     TCGv_ptr t_m = tcg_temp_new_ptr();
2123     TCGv_i32 t_desc;
2124     int desc;
2125
2126     desc = vsz - 2;
2127     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2128     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2129
2130     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2131     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2132     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2133     t_desc = tcg_const_i32(desc);
2134
2135     fn(t_d, t_n, t_m, t_desc);
2136
2137     tcg_temp_free_ptr(t_d);
2138     tcg_temp_free_ptr(t_n);
2139     tcg_temp_free_ptr(t_m);
2140     tcg_temp_free_i32(t_desc);
2141     return true;
2142 }
2143
2144 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2145                           gen_helper_gvec_2 *fn)
2146 {
2147     if (!sve_access_check(s)) {
2148         return true;
2149     }
2150
2151     unsigned vsz = pred_full_reg_size(s);
2152     TCGv_ptr t_d = tcg_temp_new_ptr();
2153     TCGv_ptr t_n = tcg_temp_new_ptr();
2154     TCGv_i32 t_desc;
2155     int desc;
2156
2157     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2158     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2159
2160     /* Predicate sizes may be smaller and cannot use simd_desc.
2161        We cannot round up, as we do elsewhere, because we need
2162        the exact size for ZIP2 and REV.  We retain the style for
2163        the other helpers for consistency.  */
2164
2165     desc = vsz - 2;
2166     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2167     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2168     t_desc = tcg_const_i32(desc);
2169
2170     fn(t_d, t_n, t_desc);
2171
2172     tcg_temp_free_i32(t_desc);
2173     tcg_temp_free_ptr(t_d);
2174     tcg_temp_free_ptr(t_n);
2175     return true;
2176 }
2177
2178 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2179 {
2180     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2181 }
2182
2183 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2184 {
2185     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2186 }
2187
2188 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2189 {
2190     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2191 }
2192
2193 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2194 {
2195     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2196 }
2197
2198 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2199 {
2200     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2201 }
2202
2203 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2204 {
2205     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2206 }
2207
2208 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2209 {
2210     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2211 }
2212
2213 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2214 {
2215     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2216 }
2217
2218 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2219 {
2220     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2221 }
2222
2223 /*
2224  *** SVE Permute - Interleaving Group
2225  */
2226
2227 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2228 {
2229     static gen_helper_gvec_3 * const fns[4] = {
2230         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2231         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2232     };
2233
2234     if (sve_access_check(s)) {
2235         unsigned vsz = vec_full_reg_size(s);
2236         unsigned high_ofs = high ? vsz / 2 : 0;
2237         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2238                            vec_full_reg_offset(s, a->rn) + high_ofs,
2239                            vec_full_reg_offset(s, a->rm) + high_ofs,
2240                            vsz, vsz, 0, fns[a->esz]);
2241     }
2242     return true;
2243 }
2244
2245 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2246                             gen_helper_gvec_3 *fn)
2247 {
2248     if (sve_access_check(s)) {
2249         unsigned vsz = vec_full_reg_size(s);
2250         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2251                            vec_full_reg_offset(s, a->rn),
2252                            vec_full_reg_offset(s, a->rm),
2253                            vsz, vsz, data, fn);
2254     }
2255     return true;
2256 }
2257
2258 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2259 {
2260     return do_zip(s, a, false);
2261 }
2262
2263 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2264 {
2265     return do_zip(s, a, true);
2266 }
2267
2268 static gen_helper_gvec_3 * const uzp_fns[4] = {
2269     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2270     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2271 };
2272
2273 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2274 {
2275     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2276 }
2277
2278 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2279 {
2280     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2281 }
2282
2283 static gen_helper_gvec_3 * const trn_fns[4] = {
2284     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2285     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2286 };
2287
2288 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2289 {
2290     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2291 }
2292
2293 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294 {
2295     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2296 }
2297
2298 /*
2299  *** SVE Permute Vector - Predicated Group
2300  */
2301
2302 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2303 {
2304     static gen_helper_gvec_3 * const fns[4] = {
2305         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2306     };
2307     return do_zpz_ool(s, a, fns[a->esz]);
2308 }
2309
2310 /* Call the helper that computes the ARM LastActiveElement pseudocode
2311  * function, scaled by the element size.  This includes the not found
2312  * indication; e.g. not found for esz=3 is -8.
2313  */
2314 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2315 {
2316     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2317      * round up, as we do elsewhere, because we need the exact size.
2318      */
2319     TCGv_ptr t_p = tcg_temp_new_ptr();
2320     TCGv_i32 t_desc;
2321     unsigned vsz = pred_full_reg_size(s);
2322     unsigned desc;
2323
2324     desc = vsz - 2;
2325     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2326
2327     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2328     t_desc = tcg_const_i32(desc);
2329
2330     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2331
2332     tcg_temp_free_i32(t_desc);
2333     tcg_temp_free_ptr(t_p);
2334 }
2335
2336 /* Increment LAST to the offset of the next element in the vector,
2337  * wrapping around to 0.
2338  */
2339 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2340 {
2341     unsigned vsz = vec_full_reg_size(s);
2342
2343     tcg_gen_addi_i32(last, last, 1 << esz);
2344     if (is_power_of_2(vsz)) {
2345         tcg_gen_andi_i32(last, last, vsz - 1);
2346     } else {
2347         TCGv_i32 max = tcg_const_i32(vsz);
2348         TCGv_i32 zero = tcg_const_i32(0);
2349         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2350         tcg_temp_free_i32(max);
2351         tcg_temp_free_i32(zero);
2352     }
2353 }
2354
2355 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2356 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2357 {
2358     unsigned vsz = vec_full_reg_size(s);
2359
2360     if (is_power_of_2(vsz)) {
2361         tcg_gen_andi_i32(last, last, vsz - 1);
2362     } else {
2363         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2364         TCGv_i32 zero = tcg_const_i32(0);
2365         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2366         tcg_temp_free_i32(max);
2367         tcg_temp_free_i32(zero);
2368     }
2369 }
2370
2371 /* Load an unsigned element of ESZ from BASE+OFS.  */
2372 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2373 {
2374     TCGv_i64 r = tcg_temp_new_i64();
2375
2376     switch (esz) {
2377     case 0:
2378         tcg_gen_ld8u_i64(r, base, ofs);
2379         break;
2380     case 1:
2381         tcg_gen_ld16u_i64(r, base, ofs);
2382         break;
2383     case 2:
2384         tcg_gen_ld32u_i64(r, base, ofs);
2385         break;
2386     case 3:
2387         tcg_gen_ld_i64(r, base, ofs);
2388         break;
2389     default:
2390         g_assert_not_reached();
2391     }
2392     return r;
2393 }
2394
2395 /* Load an unsigned element of ESZ from RM[LAST].  */
2396 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2397                                  int rm, int esz)
2398 {
2399     TCGv_ptr p = tcg_temp_new_ptr();
2400     TCGv_i64 r;
2401
2402     /* Convert offset into vector into offset into ENV.
2403      * The final adjustment for the vector register base
2404      * is added via constant offset to the load.
2405      */
2406 #ifdef HOST_WORDS_BIGENDIAN
2407     /* Adjust for element ordering.  See vec_reg_offset.  */
2408     if (esz < 3) {
2409         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2410     }
2411 #endif
2412     tcg_gen_ext_i32_ptr(p, last);
2413     tcg_gen_add_ptr(p, p, cpu_env);
2414
2415     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2416     tcg_temp_free_ptr(p);
2417
2418     return r;
2419 }
2420
2421 /* Compute CLAST for a Zreg.  */
2422 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2423 {
2424     TCGv_i32 last;
2425     TCGLabel *over;
2426     TCGv_i64 ele;
2427     unsigned vsz, esz = a->esz;
2428
2429     if (!sve_access_check(s)) {
2430         return true;
2431     }
2432
2433     last = tcg_temp_local_new_i32();
2434     over = gen_new_label();
2435
2436     find_last_active(s, last, esz, a->pg);
2437
2438     /* There is of course no movcond for a 2048-bit vector,
2439      * so we must branch over the actual store.
2440      */
2441     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2442
2443     if (!before) {
2444         incr_last_active(s, last, esz);
2445     }
2446
2447     ele = load_last_active(s, last, a->rm, esz);
2448     tcg_temp_free_i32(last);
2449
2450     vsz = vec_full_reg_size(s);
2451     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2452     tcg_temp_free_i64(ele);
2453
2454     /* If this insn used MOVPRFX, we may need a second move.  */
2455     if (a->rd != a->rn) {
2456         TCGLabel *done = gen_new_label();
2457         tcg_gen_br(done);
2458
2459         gen_set_label(over);
2460         do_mov_z(s, a->rd, a->rn);
2461
2462         gen_set_label(done);
2463     } else {
2464         gen_set_label(over);
2465     }
2466     return true;
2467 }
2468
2469 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2470 {
2471     return do_clast_vector(s, a, false);
2472 }
2473
2474 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2475 {
2476     return do_clast_vector(s, a, true);
2477 }
2478
2479 /* Compute CLAST for a scalar.  */
2480 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2481                             bool before, TCGv_i64 reg_val)
2482 {
2483     TCGv_i32 last = tcg_temp_new_i32();
2484     TCGv_i64 ele, cmp, zero;
2485
2486     find_last_active(s, last, esz, pg);
2487
2488     /* Extend the original value of last prior to incrementing.  */
2489     cmp = tcg_temp_new_i64();
2490     tcg_gen_ext_i32_i64(cmp, last);
2491
2492     if (!before) {
2493         incr_last_active(s, last, esz);
2494     }
2495
2496     /* The conceit here is that while last < 0 indicates not found, after
2497      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2498      * from which we can load garbage.  We then discard the garbage with
2499      * a conditional move.
2500      */
2501     ele = load_last_active(s, last, rm, esz);
2502     tcg_temp_free_i32(last);
2503
2504     zero = tcg_const_i64(0);
2505     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2506
2507     tcg_temp_free_i64(zero);
2508     tcg_temp_free_i64(cmp);
2509     tcg_temp_free_i64(ele);
2510 }
2511
2512 /* Compute CLAST for a Vreg.  */
2513 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2514 {
2515     if (sve_access_check(s)) {
2516         int esz = a->esz;
2517         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2518         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2519
2520         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2521         write_fp_dreg(s, a->rd, reg);
2522         tcg_temp_free_i64(reg);
2523     }
2524     return true;
2525 }
2526
2527 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2528 {
2529     return do_clast_fp(s, a, false);
2530 }
2531
2532 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2533 {
2534     return do_clast_fp(s, a, true);
2535 }
2536
2537 /* Compute CLAST for a Xreg.  */
2538 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2539 {
2540     TCGv_i64 reg;
2541
2542     if (!sve_access_check(s)) {
2543         return true;
2544     }
2545
2546     reg = cpu_reg(s, a->rd);
2547     switch (a->esz) {
2548     case 0:
2549         tcg_gen_ext8u_i64(reg, reg);
2550         break;
2551     case 1:
2552         tcg_gen_ext16u_i64(reg, reg);
2553         break;
2554     case 2:
2555         tcg_gen_ext32u_i64(reg, reg);
2556         break;
2557     case 3:
2558         break;
2559     default:
2560         g_assert_not_reached();
2561     }
2562
2563     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2564     return true;
2565 }
2566
2567 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2568 {
2569     return do_clast_general(s, a, false);
2570 }
2571
2572 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2573 {
2574     return do_clast_general(s, a, true);
2575 }
2576
2577 /* Compute LAST for a scalar.  */
2578 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2579                                int pg, int rm, bool before)
2580 {
2581     TCGv_i32 last = tcg_temp_new_i32();
2582     TCGv_i64 ret;
2583
2584     find_last_active(s, last, esz, pg);
2585     if (before) {
2586         wrap_last_active(s, last, esz);
2587     } else {
2588         incr_last_active(s, last, esz);
2589     }
2590
2591     ret = load_last_active(s, last, rm, esz);
2592     tcg_temp_free_i32(last);
2593     return ret;
2594 }
2595
2596 /* Compute LAST for a Vreg.  */
2597 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2598 {
2599     if (sve_access_check(s)) {
2600         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2601         write_fp_dreg(s, a->rd, val);
2602         tcg_temp_free_i64(val);
2603     }
2604     return true;
2605 }
2606
2607 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2608 {
2609     return do_last_fp(s, a, false);
2610 }
2611
2612 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2613 {
2614     return do_last_fp(s, a, true);
2615 }
2616
2617 /* Compute LAST for a Xreg.  */
2618 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2619 {
2620     if (sve_access_check(s)) {
2621         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2622         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2623         tcg_temp_free_i64(val);
2624     }
2625     return true;
2626 }
2627
2628 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 {
2630     return do_last_general(s, a, false);
2631 }
2632
2633 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2634 {
2635     return do_last_general(s, a, true);
2636 }
2637
2638 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2639 {
2640     if (sve_access_check(s)) {
2641         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2642     }
2643     return true;
2644 }
2645
2646 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2647 {
2648     if (sve_access_check(s)) {
2649         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2650         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2651         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2652         tcg_temp_free_i64(t);
2653     }
2654     return true;
2655 }
2656
2657 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658 {
2659     static gen_helper_gvec_3 * const fns[4] = {
2660         NULL,
2661         gen_helper_sve_revb_h,
2662         gen_helper_sve_revb_s,
2663         gen_helper_sve_revb_d,
2664     };
2665     return do_zpz_ool(s, a, fns[a->esz]);
2666 }
2667
2668 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669 {
2670     static gen_helper_gvec_3 * const fns[4] = {
2671         NULL,
2672         NULL,
2673         gen_helper_sve_revh_s,
2674         gen_helper_sve_revh_d,
2675     };
2676     return do_zpz_ool(s, a, fns[a->esz]);
2677 }
2678
2679 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680 {
2681     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2682 }
2683
2684 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686     static gen_helper_gvec_3 * const fns[4] = {
2687         gen_helper_sve_rbit_b,
2688         gen_helper_sve_rbit_h,
2689         gen_helper_sve_rbit_s,
2690         gen_helper_sve_rbit_d,
2691     };
2692     return do_zpz_ool(s, a, fns[a->esz]);
2693 }
2694
2695 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2696 {
2697     if (sve_access_check(s)) {
2698         unsigned vsz = vec_full_reg_size(s);
2699         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2700                            vec_full_reg_offset(s, a->rn),
2701                            vec_full_reg_offset(s, a->rm),
2702                            pred_full_reg_offset(s, a->pg),
2703                            vsz, vsz, a->esz, gen_helper_sve_splice);
2704     }
2705     return true;
2706 }
2707
2708 /*
2709  *** SVE Integer Compare - Vectors Group
2710  */
2711
2712 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2713                           gen_helper_gvec_flags_4 *gen_fn)
2714 {
2715     TCGv_ptr pd, zn, zm, pg;
2716     unsigned vsz;
2717     TCGv_i32 t;
2718
2719     if (gen_fn == NULL) {
2720         return false;
2721     }
2722     if (!sve_access_check(s)) {
2723         return true;
2724     }
2725
2726     vsz = vec_full_reg_size(s);
2727     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2728     pd = tcg_temp_new_ptr();
2729     zn = tcg_temp_new_ptr();
2730     zm = tcg_temp_new_ptr();
2731     pg = tcg_temp_new_ptr();
2732
2733     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2734     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2735     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2736     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2737
2738     gen_fn(t, pd, zn, zm, pg, t);
2739
2740     tcg_temp_free_ptr(pd);
2741     tcg_temp_free_ptr(zn);
2742     tcg_temp_free_ptr(zm);
2743     tcg_temp_free_ptr(pg);
2744
2745     do_pred_flags(t);
2746
2747     tcg_temp_free_i32(t);
2748     return true;
2749 }
2750
2751 #define DO_PPZZ(NAME, name) \
2752 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2753                                 uint32_t insn)                            \
2754 {                                                                         \
2755     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2756         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2757         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2758     };                                                                    \
2759     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2760 }
2761
2762 DO_PPZZ(CMPEQ, cmpeq)
2763 DO_PPZZ(CMPNE, cmpne)
2764 DO_PPZZ(CMPGT, cmpgt)
2765 DO_PPZZ(CMPGE, cmpge)
2766 DO_PPZZ(CMPHI, cmphi)
2767 DO_PPZZ(CMPHS, cmphs)
2768
2769 #undef DO_PPZZ
2770
2771 #define DO_PPZW(NAME, name) \
2772 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2773                                 uint32_t insn)                            \
2774 {                                                                         \
2775     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2776         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2777         gen_helper_sve_##name##_ppzw_s, NULL                              \
2778     };                                                                    \
2779     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2780 }
2781
2782 DO_PPZW(CMPEQ, cmpeq)
2783 DO_PPZW(CMPNE, cmpne)
2784 DO_PPZW(CMPGT, cmpgt)
2785 DO_PPZW(CMPGE, cmpge)
2786 DO_PPZW(CMPHI, cmphi)
2787 DO_PPZW(CMPHS, cmphs)
2788 DO_PPZW(CMPLT, cmplt)
2789 DO_PPZW(CMPLE, cmple)
2790 DO_PPZW(CMPLO, cmplo)
2791 DO_PPZW(CMPLS, cmpls)
2792
2793 #undef DO_PPZW
2794
2795 /*
2796  *** SVE Integer Compare - Immediate Groups
2797  */
2798
2799 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2800                           gen_helper_gvec_flags_3 *gen_fn)
2801 {
2802     TCGv_ptr pd, zn, pg;
2803     unsigned vsz;
2804     TCGv_i32 t;
2805
2806     if (gen_fn == NULL) {
2807         return false;
2808     }
2809     if (!sve_access_check(s)) {
2810         return true;
2811     }
2812
2813     vsz = vec_full_reg_size(s);
2814     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2815     pd = tcg_temp_new_ptr();
2816     zn = tcg_temp_new_ptr();
2817     pg = tcg_temp_new_ptr();
2818
2819     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2820     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2821     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2822
2823     gen_fn(t, pd, zn, pg, t);
2824
2825     tcg_temp_free_ptr(pd);
2826     tcg_temp_free_ptr(zn);
2827     tcg_temp_free_ptr(pg);
2828
2829     do_pred_flags(t);
2830
2831     tcg_temp_free_i32(t);
2832     return true;
2833 }
2834
2835 #define DO_PPZI(NAME, name) \
2836 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2837                                 uint32_t insn)                            \
2838 {                                                                         \
2839     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2840         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2841         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2842     };                                                                    \
2843     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2844 }
2845
2846 DO_PPZI(CMPEQ, cmpeq)
2847 DO_PPZI(CMPNE, cmpne)
2848 DO_PPZI(CMPGT, cmpgt)
2849 DO_PPZI(CMPGE, cmpge)
2850 DO_PPZI(CMPHI, cmphi)
2851 DO_PPZI(CMPHS, cmphs)
2852 DO_PPZI(CMPLT, cmplt)
2853 DO_PPZI(CMPLE, cmple)
2854 DO_PPZI(CMPLO, cmplo)
2855 DO_PPZI(CMPLS, cmpls)
2856
2857 #undef DO_PPZI
2858
2859 /*
2860  *** SVE Partition Break Group
2861  */
2862
2863 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2864                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2865 {
2866     if (!sve_access_check(s)) {
2867         return true;
2868     }
2869
2870     unsigned vsz = pred_full_reg_size(s);
2871
2872     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2873     TCGv_ptr d = tcg_temp_new_ptr();
2874     TCGv_ptr n = tcg_temp_new_ptr();
2875     TCGv_ptr m = tcg_temp_new_ptr();
2876     TCGv_ptr g = tcg_temp_new_ptr();
2877     TCGv_i32 t = tcg_const_i32(vsz - 2);
2878
2879     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2880     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2881     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2882     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2883
2884     if (a->s) {
2885         fn_s(t, d, n, m, g, t);
2886         do_pred_flags(t);
2887     } else {
2888         fn(d, n, m, g, t);
2889     }
2890     tcg_temp_free_ptr(d);
2891     tcg_temp_free_ptr(n);
2892     tcg_temp_free_ptr(m);
2893     tcg_temp_free_ptr(g);
2894     tcg_temp_free_i32(t);
2895     return true;
2896 }
2897
2898 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2899                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2900 {
2901     if (!sve_access_check(s)) {
2902         return true;
2903     }
2904
2905     unsigned vsz = pred_full_reg_size(s);
2906
2907     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2908     TCGv_ptr d = tcg_temp_new_ptr();
2909     TCGv_ptr n = tcg_temp_new_ptr();
2910     TCGv_ptr g = tcg_temp_new_ptr();
2911     TCGv_i32 t = tcg_const_i32(vsz - 2);
2912
2913     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2914     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2915     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916
2917     if (a->s) {
2918         fn_s(t, d, n, g, t);
2919         do_pred_flags(t);
2920     } else {
2921         fn(d, n, g, t);
2922     }
2923     tcg_temp_free_ptr(d);
2924     tcg_temp_free_ptr(n);
2925     tcg_temp_free_ptr(g);
2926     tcg_temp_free_i32(t);
2927     return true;
2928 }
2929
2930 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2931 {
2932     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2933 }
2934
2935 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2936 {
2937     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2938 }
2939
2940 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2941 {
2942     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2943 }
2944
2945 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2946 {
2947     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2948 }
2949
2950 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2951 {
2952     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2953 }
2954
2955 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2956 {
2957     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2958 }
2959
2960 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2961 {
2962     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2963 }
2964
2965 /*
2966  *** SVE Predicate Count Group
2967  */
2968
2969 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2970 {
2971     unsigned psz = pred_full_reg_size(s);
2972
2973     if (psz <= 8) {
2974         uint64_t psz_mask;
2975
2976         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2977         if (pn != pg) {
2978             TCGv_i64 g = tcg_temp_new_i64();
2979             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2980             tcg_gen_and_i64(val, val, g);
2981             tcg_temp_free_i64(g);
2982         }
2983
2984         /* Reduce the pred_esz_masks value simply to reduce the
2985          * size of the code generated here.
2986          */
2987         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2988         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2989
2990         tcg_gen_ctpop_i64(val, val);
2991     } else {
2992         TCGv_ptr t_pn = tcg_temp_new_ptr();
2993         TCGv_ptr t_pg = tcg_temp_new_ptr();
2994         unsigned desc;
2995         TCGv_i32 t_desc;
2996
2997         desc = psz - 2;
2998         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2999
3000         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3001         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3002         t_desc = tcg_const_i32(desc);
3003
3004         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3005         tcg_temp_free_ptr(t_pn);
3006         tcg_temp_free_ptr(t_pg);
3007         tcg_temp_free_i32(t_desc);
3008     }
3009 }
3010
3011 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3012 {
3013     if (sve_access_check(s)) {
3014         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3015     }
3016     return true;
3017 }
3018
3019 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3020                             uint32_t insn)
3021 {
3022     if (sve_access_check(s)) {
3023         TCGv_i64 reg = cpu_reg(s, a->rd);
3024         TCGv_i64 val = tcg_temp_new_i64();
3025
3026         do_cntp(s, val, a->esz, a->pg, a->pg);
3027         if (a->d) {
3028             tcg_gen_sub_i64(reg, reg, val);
3029         } else {
3030             tcg_gen_add_i64(reg, reg, val);
3031         }
3032         tcg_temp_free_i64(val);
3033     }
3034     return true;
3035 }
3036
3037 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3038                             uint32_t insn)
3039 {
3040     if (a->esz == 0) {
3041         return false;
3042     }
3043     if (sve_access_check(s)) {
3044         unsigned vsz = vec_full_reg_size(s);
3045         TCGv_i64 val = tcg_temp_new_i64();
3046         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3047
3048         do_cntp(s, val, a->esz, a->pg, a->pg);
3049         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3050                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3051     }
3052     return true;
3053 }
3054
3055 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3056                                 uint32_t insn)
3057 {
3058     if (sve_access_check(s)) {
3059         TCGv_i64 reg = cpu_reg(s, a->rd);
3060         TCGv_i64 val = tcg_temp_new_i64();
3061
3062         do_cntp(s, val, a->esz, a->pg, a->pg);
3063         do_sat_addsub_32(reg, val, a->u, a->d);
3064     }
3065     return true;
3066 }
3067
3068 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3069                                 uint32_t insn)
3070 {
3071     if (sve_access_check(s)) {
3072         TCGv_i64 reg = cpu_reg(s, a->rd);
3073         TCGv_i64 val = tcg_temp_new_i64();
3074
3075         do_cntp(s, val, a->esz, a->pg, a->pg);
3076         do_sat_addsub_64(reg, val, a->u, a->d);
3077     }
3078     return true;
3079 }
3080
3081 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3082                              uint32_t insn)
3083 {
3084     if (a->esz == 0) {
3085         return false;
3086     }
3087     if (sve_access_check(s)) {
3088         TCGv_i64 val = tcg_temp_new_i64();
3089         do_cntp(s, val, a->esz, a->pg, a->pg);
3090         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3091     }
3092     return true;
3093 }
3094
3095 /*
3096  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3097  */
3098
3099 /* Subroutine loading a vector register at VOFS of LEN bytes.
3100  * The load should begin at the address Rn + IMM.
3101  */
3102
3103 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3104                    int rn, int imm)
3105 {
3106     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3107     uint32_t len_remain = len % 8;
3108     uint32_t nparts = len / 8 + ctpop8(len_remain);
3109     int midx = get_mem_index(s);
3110     TCGv_i64 addr, t0, t1;
3111
3112     addr = tcg_temp_new_i64();
3113     t0 = tcg_temp_new_i64();
3114
3115     /* Note that unpredicated load/store of vector/predicate registers
3116      * are defined as a stream of bytes, which equates to little-endian
3117      * operations on larger quantities.  There is no nice way to force
3118      * a little-endian load for aarch64_be-linux-user out of line.
3119      *
3120      * Attempt to keep code expansion to a minimum by limiting the
3121      * amount of unrolling done.
3122      */
3123     if (nparts <= 4) {
3124         int i;
3125
3126         for (i = 0; i < len_align; i += 8) {
3127             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3128             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3129             tcg_gen_st_i64(t0, cpu_env, vofs + i);
3130         }
3131     } else {
3132         TCGLabel *loop = gen_new_label();
3133         TCGv_ptr tp, i = tcg_const_local_ptr(0);
3134
3135         gen_set_label(loop);
3136
3137         /* Minimize the number of local temps that must be re-read from
3138          * the stack each iteration.  Instead, re-compute values other
3139          * than the loop counter.
3140          */
3141         tp = tcg_temp_new_ptr();
3142         tcg_gen_addi_ptr(tp, i, imm);
3143         tcg_gen_extu_ptr_i64(addr, tp);
3144         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3145
3146         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3147
3148         tcg_gen_add_ptr(tp, cpu_env, i);
3149         tcg_gen_addi_ptr(i, i, 8);
3150         tcg_gen_st_i64(t0, tp, vofs);
3151         tcg_temp_free_ptr(tp);
3152
3153         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3154         tcg_temp_free_ptr(i);
3155     }
3156
3157     /* Predicate register loads can be any multiple of 2.
3158      * Note that we still store the entire 64-bit unit into cpu_env.
3159      */
3160     if (len_remain) {
3161         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3162
3163         switch (len_remain) {
3164         case 2:
3165         case 4:
3166         case 8:
3167             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3168             break;
3169
3170         case 6:
3171             t1 = tcg_temp_new_i64();
3172             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3173             tcg_gen_addi_i64(addr, addr, 4);
3174             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3175             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3176             tcg_temp_free_i64(t1);
3177             break;
3178
3179         default:
3180             g_assert_not_reached();
3181         }
3182         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3183     }
3184     tcg_temp_free_i64(addr);
3185     tcg_temp_free_i64(t0);
3186 }
3187
3188 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3189 {
3190     if (sve_access_check(s)) {
3191         int size = vec_full_reg_size(s);
3192         int off = vec_full_reg_offset(s, a->rd);
3193         do_ldr(s, off, size, a->rn, a->imm * size);
3194     }
3195     return true;
3196 }
3197
3198 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3199 {
3200     if (sve_access_check(s)) {
3201         int size = pred_full_reg_size(s);
3202         int off = pred_full_reg_offset(s, a->rd);
3203         do_ldr(s, off, size, a->rn, a->imm * size);
3204     }
3205     return true;
3206 }