target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36 /*
  37  * Helpers for extracting complex instruction fields.
  38  */
  39
  40 /* See e.g. ASR (immediate, predicated).
  41  * Returns -1 for unallocated encoding; diagnose later.
  42  */
  43 static int tszimm_esz(int x)
  44 {
  45     x >>= 3;  /* discard imm3 */
  46     return 31 - clz32(x);
  47 }
  48
  49 static int tszimm_shr(int x)
  50 {
  51     return (16 << tszimm_esz(x)) - x;
  52 }
  53
  54 /* See e.g. LSL (immediate, predicated).  */
  55 static int tszimm_shl(int x)
  56 {
  57     return x - (8 << tszimm_esz(x));
  58 }
  59
  60 static inline int plus1(int x)
  61 {
  62     return x + 1;
  63 }
  64
  65 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  66 static inline int expand_imm_sh8s(int x)
  67 {
  68     return (int8_t)x << (x & 0x100 ? 8 : 0);
  69 }
  70
  71 /*
  72  * Include the generated decoder.
  73  */
  74
  75 #include "decode-sve.inc.c"
  76
  77 /*
  78  * Implement all of the translator functions referenced by the decoder.
  79  */
  80
  81 /* Return the offset info CPUARMState of the predicate vector register Pn.
  82  * Note for this purpose, FFR is P16.
  83  */
  84 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  85 {
  86     return offsetof(CPUARMState, vfp.pregs[regno]);
  87 }
  88
  89 /* Return the byte size of the whole predicate register, VL / 64.  */
  90 static inline int pred_full_reg_size(DisasContext *s)
  91 {
  92     return s->sve_len >> 3;
  93 }
  94
  95 /* Round up the size of a register to a size allowed by
  96  * the tcg vector infrastructure.  Any operation which uses this
  97  * size may assume that the bits above pred_full_reg_size are zero,
  98  * and must leave them the same way.
  99  *
 100  * Note that this is not needed for the vector registers as they
 101  * are always properly sized for tcg vectors.
 102  */
 103 static int size_for_gvec(int size)
 104 {
 105     if (size <= 8) {
 106         return 8;
 107     } else {
 108         return QEMU_ALIGN_UP(size, 16);
 109     }
 110 }
 111
 112 static int pred_gvec_reg_size(DisasContext *s)
 113 {
 114     return size_for_gvec(pred_full_reg_size(s));
 115 }
 116
 117 /* Invoke a vector expander on two Zregs.  */
 118 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 119                          int esz, int rd, int rn)
 120 {
 121     if (sve_access_check(s)) {
 122         unsigned vsz = vec_full_reg_size(s);
 123         gvec_fn(esz, vec_full_reg_offset(s, rd),
 124                 vec_full_reg_offset(s, rn), vsz, vsz);
 125     }
 126     return true;
 127 }
 128
 129 /* Invoke a vector expander on three Zregs.  */
 130 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 131                          int esz, int rd, int rn, int rm)
 132 {
 133     if (sve_access_check(s)) {
 134         unsigned vsz = vec_full_reg_size(s);
 135         gvec_fn(esz, vec_full_reg_offset(s, rd),
 136                 vec_full_reg_offset(s, rn),
 137                 vec_full_reg_offset(s, rm), vsz, vsz);
 138     }
 139     return true;
 140 }
 141
 142 /* Invoke a vector move on two Zregs.  */
 143 static bool do_mov_z(DisasContext *s, int rd, int rn)
 144 {
 145     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 146 }
 147
 148 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 149 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 150 {
 151     unsigned vsz = vec_full_reg_size(s);
 152     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 153 }
 154
 155 /* Invoke a vector expander on two Pregs.  */
 156 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 157                          int esz, int rd, int rn)
 158 {
 159     if (sve_access_check(s)) {
 160         unsigned psz = pred_gvec_reg_size(s);
 161         gvec_fn(esz, pred_full_reg_offset(s, rd),
 162                 pred_full_reg_offset(s, rn), psz, psz);
 163     }
 164     return true;
 165 }
 166
 167 /* Invoke a vector expander on three Pregs.  */
 168 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 169                          int esz, int rd, int rn, int rm)
 170 {
 171     if (sve_access_check(s)) {
 172         unsigned psz = pred_gvec_reg_size(s);
 173         gvec_fn(esz, pred_full_reg_offset(s, rd),
 174                 pred_full_reg_offset(s, rn),
 175                 pred_full_reg_offset(s, rm), psz, psz);
 176     }
 177     return true;
 178 }
 179
 180 /* Invoke a vector operation on four Pregs.  */
 181 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 182                         int rd, int rn, int rm, int rg)
 183 {
 184     if (sve_access_check(s)) {
 185         unsigned psz = pred_gvec_reg_size(s);
 186         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 187                        pred_full_reg_offset(s, rn),
 188                        pred_full_reg_offset(s, rm),
 189                        pred_full_reg_offset(s, rg),
 190                        psz, psz, gvec_op);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector move on two Pregs.  */
 196 static bool do_mov_p(DisasContext *s, int rd, int rn)
 197 {
 198     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 199 }
 200
 201 /* Set the cpu flags as per a return from an SVE helper.  */
 202 static void do_pred_flags(TCGv_i32 t)
 203 {
 204     tcg_gen_mov_i32(cpu_NF, t);
 205     tcg_gen_andi_i32(cpu_ZF, t, 2);
 206     tcg_gen_andi_i32(cpu_CF, t, 1);
 207     tcg_gen_movi_i32(cpu_VF, 0);
 208 }
 209
 210 /* Subroutines computing the ARM PredTest psuedofunction.  */
 211 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 212 {
 213     TCGv_i32 t = tcg_temp_new_i32();
 214
 215     gen_helper_sve_predtest1(t, d, g);
 216     do_pred_flags(t);
 217     tcg_temp_free_i32(t);
 218 }
 219
 220 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 221 {
 222     TCGv_ptr dptr = tcg_temp_new_ptr();
 223     TCGv_ptr gptr = tcg_temp_new_ptr();
 224     TCGv_i32 t;
 225
 226     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 227     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 228     t = tcg_const_i32(words);
 229
 230     gen_helper_sve_predtest(t, dptr, gptr, t);
 231     tcg_temp_free_ptr(dptr);
 232     tcg_temp_free_ptr(gptr);
 233
 234     do_pred_flags(t);
 235     tcg_temp_free_i32(t);
 236 }
 237
 238 /* For each element size, the bits within a predicate word that are active.  */
 239 const uint64_t pred_esz_masks[4] = {
 240     0xffffffffffffffffull, 0x5555555555555555ull,
 241     0x1111111111111111ull, 0x0101010101010101ull
 242 };
 243
 244 /*
 245  *** SVE Logical - Unpredicated Group
 246  */
 247
 248 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 249 {
 250     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 251 }
 252
 253 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 254 {
 255     if (a->rn == a->rm) { /* MOV */
 256         return do_mov_z(s, a->rd, a->rn);
 257     } else {
 258         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 259     }
 260 }
 261
 262 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 263 {
 264     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 265 }
 266
 267 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 268 {
 269     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 270 }
 271
 272 /*
 273  *** SVE Integer Arithmetic - Unpredicated Group
 274  */
 275
 276 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 294 }
 295
 296 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 297 {
 298     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 299 }
 300
 301 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 302 {
 303     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 304 }
 305
 306 /*
 307  *** SVE Integer Arithmetic - Binary Predicated Group
 308  */
 309
 310 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 311 {
 312     unsigned vsz = vec_full_reg_size(s);
 313     if (fn == NULL) {
 314         return false;
 315     }
 316     if (sve_access_check(s)) {
 317         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 318                            vec_full_reg_offset(s, a->rn),
 319                            vec_full_reg_offset(s, a->rm),
 320                            pred_full_reg_offset(s, a->pg),
 321                            vsz, vsz, 0, fn);
 322     }
 323     return true;
 324 }
 325
 326 #define DO_ZPZZ(NAME, name) \
 327 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 328                                 uint32_t insn)                            \
 329 {                                                                         \
 330     static gen_helper_gvec_4 * const fns[4] = {                           \
 331         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 332         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 333     };                                                                    \
 334     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 335 }
 336
 337 DO_ZPZZ(AND, and)
 338 DO_ZPZZ(EOR, eor)
 339 DO_ZPZZ(ORR, orr)
 340 DO_ZPZZ(BIC, bic)
 341
 342 DO_ZPZZ(ADD, add)
 343 DO_ZPZZ(SUB, sub)
 344
 345 DO_ZPZZ(SMAX, smax)
 346 DO_ZPZZ(UMAX, umax)
 347 DO_ZPZZ(SMIN, smin)
 348 DO_ZPZZ(UMIN, umin)
 349 DO_ZPZZ(SABD, sabd)
 350 DO_ZPZZ(UABD, uabd)
 351
 352 DO_ZPZZ(MUL, mul)
 353 DO_ZPZZ(SMULH, smulh)
 354 DO_ZPZZ(UMULH, umulh)
 355
 356 DO_ZPZZ(ASR, asr)
 357 DO_ZPZZ(LSR, lsr)
 358 DO_ZPZZ(LSL, lsl)
 359
 360 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 361 {
 362     static gen_helper_gvec_4 * const fns[4] = {
 363         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 364     };
 365     return do_zpzz_ool(s, a, fns[a->esz]);
 366 }
 367
 368 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 369 {
 370     static gen_helper_gvec_4 * const fns[4] = {
 371         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 372     };
 373     return do_zpzz_ool(s, a, fns[a->esz]);
 374 }
 375
 376 #undef DO_ZPZZ
 377
 378 /*
 379  *** SVE Integer Arithmetic - Unary Predicated Group
 380  */
 381
 382 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 383 {
 384     if (fn == NULL) {
 385         return false;
 386     }
 387     if (sve_access_check(s)) {
 388         unsigned vsz = vec_full_reg_size(s);
 389         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 390                            vec_full_reg_offset(s, a->rn),
 391                            pred_full_reg_offset(s, a->pg),
 392                            vsz, vsz, 0, fn);
 393     }
 394     return true;
 395 }
 396
 397 #define DO_ZPZ(NAME, name) \
 398 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 399 {                                                                   \
 400     static gen_helper_gvec_3 * const fns[4] = {                     \
 401         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 402         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 403     };                                                              \
 404     return do_zpz_ool(s, a, fns[a->esz]);                           \
 405 }
 406
 407 DO_ZPZ(CLS, cls)
 408 DO_ZPZ(CLZ, clz)
 409 DO_ZPZ(CNT_zpz, cnt_zpz)
 410 DO_ZPZ(CNOT, cnot)
 411 DO_ZPZ(NOT_zpz, not_zpz)
 412 DO_ZPZ(ABS, abs)
 413 DO_ZPZ(NEG, neg)
 414
 415 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 416 {
 417     static gen_helper_gvec_3 * const fns[4] = {
 418         NULL,
 419         gen_helper_sve_fabs_h,
 420         gen_helper_sve_fabs_s,
 421         gen_helper_sve_fabs_d
 422     };
 423     return do_zpz_ool(s, a, fns[a->esz]);
 424 }
 425
 426 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 427 {
 428     static gen_helper_gvec_3 * const fns[4] = {
 429         NULL,
 430         gen_helper_sve_fneg_h,
 431         gen_helper_sve_fneg_s,
 432         gen_helper_sve_fneg_d
 433     };
 434     return do_zpz_ool(s, a, fns[a->esz]);
 435 }
 436
 437 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 438 {
 439     static gen_helper_gvec_3 * const fns[4] = {
 440         NULL,
 441         gen_helper_sve_sxtb_h,
 442         gen_helper_sve_sxtb_s,
 443         gen_helper_sve_sxtb_d
 444     };
 445     return do_zpz_ool(s, a, fns[a->esz]);
 446 }
 447
 448 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 449 {
 450     static gen_helper_gvec_3 * const fns[4] = {
 451         NULL,
 452         gen_helper_sve_uxtb_h,
 453         gen_helper_sve_uxtb_s,
 454         gen_helper_sve_uxtb_d
 455     };
 456     return do_zpz_ool(s, a, fns[a->esz]);
 457 }
 458
 459 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 460 {
 461     static gen_helper_gvec_3 * const fns[4] = {
 462         NULL, NULL,
 463         gen_helper_sve_sxth_s,
 464         gen_helper_sve_sxth_d
 465     };
 466     return do_zpz_ool(s, a, fns[a->esz]);
 467 }
 468
 469 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 470 {
 471     static gen_helper_gvec_3 * const fns[4] = {
 472         NULL, NULL,
 473         gen_helper_sve_uxth_s,
 474         gen_helper_sve_uxth_d
 475     };
 476     return do_zpz_ool(s, a, fns[a->esz]);
 477 }
 478
 479 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480 {
 481     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 482 }
 483
 484 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 485 {
 486     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 487 }
 488
 489 #undef DO_ZPZ
 490
 491 /*
 492  *** SVE Integer Reduction Group
 493  */
 494
 495 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 496 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 497                        gen_helper_gvec_reduc *fn)
 498 {
 499     unsigned vsz = vec_full_reg_size(s);
 500     TCGv_ptr t_zn, t_pg;
 501     TCGv_i32 desc;
 502     TCGv_i64 temp;
 503
 504     if (fn == NULL) {
 505         return false;
 506     }
 507     if (!sve_access_check(s)) {
 508         return true;
 509     }
 510
 511     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 512     temp = tcg_temp_new_i64();
 513     t_zn = tcg_temp_new_ptr();
 514     t_pg = tcg_temp_new_ptr();
 515
 516     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 517     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 518     fn(temp, t_zn, t_pg, desc);
 519     tcg_temp_free_ptr(t_zn);
 520     tcg_temp_free_ptr(t_pg);
 521     tcg_temp_free_i32(desc);
 522
 523     write_fp_dreg(s, a->rd, temp);
 524     tcg_temp_free_i64(temp);
 525     return true;
 526 }
 527
 528 #define DO_VPZ(NAME, name) \
 529 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 530 {                                                                        \
 531     static gen_helper_gvec_reduc * const fns[4] = {                      \
 532         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 533         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 534     };                                                                   \
 535     return do_vpz_ool(s, a, fns[a->esz]);                                \
 536 }
 537
 538 DO_VPZ(ORV, orv)
 539 DO_VPZ(ANDV, andv)
 540 DO_VPZ(EORV, eorv)
 541
 542 DO_VPZ(UADDV, uaddv)
 543 DO_VPZ(SMAXV, smaxv)
 544 DO_VPZ(UMAXV, umaxv)
 545 DO_VPZ(SMINV, sminv)
 546 DO_VPZ(UMINV, uminv)
 547
 548 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 549 {
 550     static gen_helper_gvec_reduc * const fns[4] = {
 551         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 552         gen_helper_sve_saddv_s, NULL
 553     };
 554     return do_vpz_ool(s, a, fns[a->esz]);
 555 }
 556
 557 #undef DO_VPZ
 558
 559 /*
 560  *** SVE Shift by Immediate - Predicated Group
 561  */
 562
 563 /* Store zero into every active element of Zd.  We will use this for two
 564  * and three-operand predicated instructions for which logic dictates a
 565  * zero result.
 566  */
 567 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 568 {
 569     static gen_helper_gvec_2 * const fns[4] = {
 570         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 571         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 572     };
 573     if (sve_access_check(s)) {
 574         unsigned vsz = vec_full_reg_size(s);
 575         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 576                            pred_full_reg_offset(s, pg),
 577                            vsz, vsz, 0, fns[esz]);
 578     }
 579     return true;
 580 }
 581
 582 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 583                         gen_helper_gvec_3 *fn)
 584 {
 585     if (sve_access_check(s)) {
 586         unsigned vsz = vec_full_reg_size(s);
 587         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 588                            vec_full_reg_offset(s, a->rn),
 589                            pred_full_reg_offset(s, a->pg),
 590                            vsz, vsz, a->imm, fn);
 591     }
 592     return true;
 593 }
 594
 595 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 596 {
 597     static gen_helper_gvec_3 * const fns[4] = {
 598         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 599         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 600     };
 601     if (a->esz < 0) {
 602         /* Invalid tsz encoding -- see tszimm_esz. */
 603         return false;
 604     }
 605     /* Shift by element size is architecturally valid.  For
 606        arithmetic right-shift, it's the same as by one less. */
 607     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 608     return do_zpzi_ool(s, a, fns[a->esz]);
 609 }
 610
 611 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 612 {
 613     static gen_helper_gvec_3 * const fns[4] = {
 614         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 615         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 616     };
 617     if (a->esz < 0) {
 618         return false;
 619     }
 620     /* Shift by element size is architecturally valid.
 621        For logical shifts, it is a zeroing operation.  */
 622     if (a->imm >= (8 << a->esz)) {
 623         return do_clr_zp(s, a->rd, a->pg, a->esz);
 624     } else {
 625         return do_zpzi_ool(s, a, fns[a->esz]);
 626     }
 627 }
 628
 629 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 630 {
 631     static gen_helper_gvec_3 * const fns[4] = {
 632         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 633         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 634     };
 635     if (a->esz < 0) {
 636         return false;
 637     }
 638     /* Shift by element size is architecturally valid.
 639        For logical shifts, it is a zeroing operation.  */
 640     if (a->imm >= (8 << a->esz)) {
 641         return do_clr_zp(s, a->rd, a->pg, a->esz);
 642     } else {
 643         return do_zpzi_ool(s, a, fns[a->esz]);
 644     }
 645 }
 646
 647 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 648 {
 649     static gen_helper_gvec_3 * const fns[4] = {
 650         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 651         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 652     };
 653     if (a->esz < 0) {
 654         return false;
 655     }
 656     /* Shift by element size is architecturally valid.  For arithmetic
 657        right shift for division, it is a zeroing operation.  */
 658     if (a->imm >= (8 << a->esz)) {
 659         return do_clr_zp(s, a->rd, a->pg, a->esz);
 660     } else {
 661         return do_zpzi_ool(s, a, fns[a->esz]);
 662     }
 663 }
 664
 665 /*
 666  *** SVE Bitwise Shift - Predicated Group
 667  */
 668
 669 #define DO_ZPZW(NAME, name) \
 670 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 671                                 uint32_t insn)                            \
 672 {                                                                         \
 673     static gen_helper_gvec_4 * const fns[3] = {                           \
 674         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 675         gen_helper_sve_##name##_zpzw_s,                                   \
 676     };                                                                    \
 677     if (a->esz < 0 || a->esz >= 3) {                                      \
 678         return false;                                                     \
 679     }                                                                     \
 680     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 681 }
 682
 683 DO_ZPZW(ASR, asr)
 684 DO_ZPZW(LSR, lsr)
 685 DO_ZPZW(LSL, lsl)
 686
 687 #undef DO_ZPZW
 688
 689 /*
 690  *** SVE Bitwise Shift - Unpredicated Group
 691  */
 692
 693 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 694                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 695                                          int64_t, uint32_t, uint32_t))
 696 {
 697     if (a->esz < 0) {
 698         /* Invalid tsz encoding -- see tszimm_esz. */
 699         return false;
 700     }
 701     if (sve_access_check(s)) {
 702         unsigned vsz = vec_full_reg_size(s);
 703         /* Shift by element size is architecturally valid.  For
 704            arithmetic right-shift, it's the same as by one less.
 705            Otherwise it is a zeroing operation.  */
 706         if (a->imm >= 8 << a->esz) {
 707             if (asr) {
 708                 a->imm = (8 << a->esz) - 1;
 709             } else {
 710                 do_dupi_z(s, a->rd, 0);
 711                 return true;
 712             }
 713         }
 714         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 715                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 716     }
 717     return true;
 718 }
 719
 720 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 721 {
 722     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 723 }
 724
 725 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 726 {
 727     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 728 }
 729
 730 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 731 {
 732     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 733 }
 734
 735 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 736 {
 737     if (fn == NULL) {
 738         return false;
 739     }
 740     if (sve_access_check(s)) {
 741         unsigned vsz = vec_full_reg_size(s);
 742         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 743                            vec_full_reg_offset(s, a->rn),
 744                            vec_full_reg_offset(s, a->rm),
 745                            vsz, vsz, 0, fn);
 746     }
 747     return true;
 748 }
 749
 750 #define DO_ZZW(NAME, name) \
 751 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 752                                uint32_t insn)                             \
 753 {                                                                         \
 754     static gen_helper_gvec_3 * const fns[4] = {                           \
 755         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 756         gen_helper_sve_##name##_zzw_s, NULL                               \
 757     };                                                                    \
 758     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 759 }
 760
 761 DO_ZZW(ASR, asr)
 762 DO_ZZW(LSR, lsr)
 763 DO_ZZW(LSL, lsl)
 764
 765 #undef DO_ZZW
 766
 767 /*
 768  *** SVE Integer Multiply-Add Group
 769  */
 770
 771 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 772                          gen_helper_gvec_5 *fn)
 773 {
 774     if (sve_access_check(s)) {
 775         unsigned vsz = vec_full_reg_size(s);
 776         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 777                            vec_full_reg_offset(s, a->ra),
 778                            vec_full_reg_offset(s, a->rn),
 779                            vec_full_reg_offset(s, a->rm),
 780                            pred_full_reg_offset(s, a->pg),
 781                            vsz, vsz, 0, fn);
 782     }
 783     return true;
 784 }
 785
 786 #define DO_ZPZZZ(NAME, name) \
 787 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 788 {                                                                    \
 789     static gen_helper_gvec_5 * const fns[4] = {                      \
 790         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 791         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 792     };                                                               \
 793     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 794 }
 795
 796 DO_ZPZZZ(MLA, mla)
 797 DO_ZPZZZ(MLS, mls)
 798
 799 #undef DO_ZPZZZ
 800
 801 /*
 802  *** SVE Index Generation Group
 803  */
 804
 805 static void do_index(DisasContext *s, int esz, int rd,
 806                      TCGv_i64 start, TCGv_i64 incr)
 807 {
 808     unsigned vsz = vec_full_reg_size(s);
 809     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 810     TCGv_ptr t_zd = tcg_temp_new_ptr();
 811
 812     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 813     if (esz == 3) {
 814         gen_helper_sve_index_d(t_zd, start, incr, desc);
 815     } else {
 816         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 817         static index_fn * const fns[3] = {
 818             gen_helper_sve_index_b,
 819             gen_helper_sve_index_h,
 820             gen_helper_sve_index_s,
 821         };
 822         TCGv_i32 s32 = tcg_temp_new_i32();
 823         TCGv_i32 i32 = tcg_temp_new_i32();
 824
 825         tcg_gen_extrl_i64_i32(s32, start);
 826         tcg_gen_extrl_i64_i32(i32, incr);
 827         fns[esz](t_zd, s32, i32, desc);
 828
 829         tcg_temp_free_i32(s32);
 830         tcg_temp_free_i32(i32);
 831     }
 832     tcg_temp_free_ptr(t_zd);
 833     tcg_temp_free_i32(desc);
 834 }
 835
 836 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 837 {
 838     if (sve_access_check(s)) {
 839         TCGv_i64 start = tcg_const_i64(a->imm1);
 840         TCGv_i64 incr = tcg_const_i64(a->imm2);
 841         do_index(s, a->esz, a->rd, start, incr);
 842         tcg_temp_free_i64(start);
 843         tcg_temp_free_i64(incr);
 844     }
 845     return true;
 846 }
 847
 848 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 849 {
 850     if (sve_access_check(s)) {
 851         TCGv_i64 start = tcg_const_i64(a->imm);
 852         TCGv_i64 incr = cpu_reg(s, a->rm);
 853         do_index(s, a->esz, a->rd, start, incr);
 854         tcg_temp_free_i64(start);
 855     }
 856     return true;
 857 }
 858
 859 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 860 {
 861     if (sve_access_check(s)) {
 862         TCGv_i64 start = cpu_reg(s, a->rn);
 863         TCGv_i64 incr = tcg_const_i64(a->imm);
 864         do_index(s, a->esz, a->rd, start, incr);
 865         tcg_temp_free_i64(incr);
 866     }
 867     return true;
 868 }
 869
 870 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 871 {
 872     if (sve_access_check(s)) {
 873         TCGv_i64 start = cpu_reg(s, a->rn);
 874         TCGv_i64 incr = cpu_reg(s, a->rm);
 875         do_index(s, a->esz, a->rd, start, incr);
 876     }
 877     return true;
 878 }
 879
 880 /*
 881  *** SVE Stack Allocation Group
 882  */
 883
 884 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 885 {
 886     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 887     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 888     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 889     return true;
 890 }
 891
 892 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 893 {
 894     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 895     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 896     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 897     return true;
 898 }
 899
 900 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 901 {
 902     TCGv_i64 reg = cpu_reg(s, a->rd);
 903     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 904     return true;
 905 }
 906
 907 /*
 908  *** SVE Compute Vector Address Group
 909  */
 910
 911 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 912 {
 913     if (sve_access_check(s)) {
 914         unsigned vsz = vec_full_reg_size(s);
 915         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 916                            vec_full_reg_offset(s, a->rn),
 917                            vec_full_reg_offset(s, a->rm),
 918                            vsz, vsz, a->imm, fn);
 919     }
 920     return true;
 921 }
 922
 923 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 924 {
 925     return do_adr(s, a, gen_helper_sve_adr_p32);
 926 }
 927
 928 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 929 {
 930     return do_adr(s, a, gen_helper_sve_adr_p64);
 931 }
 932
 933 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 934 {
 935     return do_adr(s, a, gen_helper_sve_adr_s32);
 936 }
 937
 938 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 939 {
 940     return do_adr(s, a, gen_helper_sve_adr_u32);
 941 }
 942
 943 /*
 944  *** SVE Integer Misc - Unpredicated Group
 945  */
 946
 947 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 948 {
 949     static gen_helper_gvec_2 * const fns[4] = {
 950         NULL,
 951         gen_helper_sve_fexpa_h,
 952         gen_helper_sve_fexpa_s,
 953         gen_helper_sve_fexpa_d,
 954     };
 955     if (a->esz == 0) {
 956         return false;
 957     }
 958     if (sve_access_check(s)) {
 959         unsigned vsz = vec_full_reg_size(s);
 960         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 961                            vec_full_reg_offset(s, a->rn),
 962                            vsz, vsz, 0, fns[a->esz]);
 963     }
 964     return true;
 965 }
 966
 967 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 968 {
 969     static gen_helper_gvec_3 * const fns[4] = {
 970         NULL,
 971         gen_helper_sve_ftssel_h,
 972         gen_helper_sve_ftssel_s,
 973         gen_helper_sve_ftssel_d,
 974     };
 975     if (a->esz == 0) {
 976         return false;
 977     }
 978     if (sve_access_check(s)) {
 979         unsigned vsz = vec_full_reg_size(s);
 980         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 981                            vec_full_reg_offset(s, a->rn),
 982                            vec_full_reg_offset(s, a->rm),
 983                            vsz, vsz, 0, fns[a->esz]);
 984     }
 985     return true;
 986 }
 987
 988 /*
 989  *** SVE Predicate Logical Operations Group
 990  */
 991
 992 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 993                           const GVecGen4 *gvec_op)
 994 {
 995     if (!sve_access_check(s)) {
 996         return true;
 997     }
 998
 999     unsigned psz = pred_gvec_reg_size(s);
1000     int dofs = pred_full_reg_offset(s, a->rd);
1001     int nofs = pred_full_reg_offset(s, a->rn);
1002     int mofs = pred_full_reg_offset(s, a->rm);
1003     int gofs = pred_full_reg_offset(s, a->pg);
1004
1005     if (psz == 8) {
1006         /* Do the operation and the flags generation in temps.  */
1007         TCGv_i64 pd = tcg_temp_new_i64();
1008         TCGv_i64 pn = tcg_temp_new_i64();
1009         TCGv_i64 pm = tcg_temp_new_i64();
1010         TCGv_i64 pg = tcg_temp_new_i64();
1011
1012         tcg_gen_ld_i64(pn, cpu_env, nofs);
1013         tcg_gen_ld_i64(pm, cpu_env, mofs);
1014         tcg_gen_ld_i64(pg, cpu_env, gofs);
1015
1016         gvec_op->fni8(pd, pn, pm, pg);
1017         tcg_gen_st_i64(pd, cpu_env, dofs);
1018
1019         do_predtest1(pd, pg);
1020
1021         tcg_temp_free_i64(pd);
1022         tcg_temp_free_i64(pn);
1023         tcg_temp_free_i64(pm);
1024         tcg_temp_free_i64(pg);
1025     } else {
1026         /* The operation and flags generation is large.  The computation
1027          * of the flags depends on the original contents of the guarding
1028          * predicate.  If the destination overwrites the guarding predicate,
1029          * then the easiest way to get this right is to save a copy.
1030           */
1031         int tofs = gofs;
1032         if (a->rd == a->pg) {
1033             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1034             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1035         }
1036
1037         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1038         do_predtest(s, dofs, tofs, psz / 8);
1039     }
1040     return true;
1041 }
1042
1043 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1044 {
1045     tcg_gen_and_i64(pd, pn, pm);
1046     tcg_gen_and_i64(pd, pd, pg);
1047 }
1048
1049 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1050                            TCGv_vec pm, TCGv_vec pg)
1051 {
1052     tcg_gen_and_vec(vece, pd, pn, pm);
1053     tcg_gen_and_vec(vece, pd, pd, pg);
1054 }
1055
1056 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1057 {
1058     static const GVecGen4 op = {
1059         .fni8 = gen_and_pg_i64,
1060         .fniv = gen_and_pg_vec,
1061         .fno = gen_helper_sve_and_pppp,
1062         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1063     };
1064     if (a->s) {
1065         return do_pppp_flags(s, a, &op);
1066     } else if (a->rn == a->rm) {
1067         if (a->pg == a->rn) {
1068             return do_mov_p(s, a->rd, a->rn);
1069         } else {
1070             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1071         }
1072     } else if (a->pg == a->rn || a->pg == a->rm) {
1073         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1074     } else {
1075         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1076     }
1077 }
1078
1079 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1080 {
1081     tcg_gen_andc_i64(pd, pn, pm);
1082     tcg_gen_and_i64(pd, pd, pg);
1083 }
1084
1085 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1086                            TCGv_vec pm, TCGv_vec pg)
1087 {
1088     tcg_gen_andc_vec(vece, pd, pn, pm);
1089     tcg_gen_and_vec(vece, pd, pd, pg);
1090 }
1091
1092 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1093 {
1094     static const GVecGen4 op = {
1095         .fni8 = gen_bic_pg_i64,
1096         .fniv = gen_bic_pg_vec,
1097         .fno = gen_helper_sve_bic_pppp,
1098         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1099     };
1100     if (a->s) {
1101         return do_pppp_flags(s, a, &op);
1102     } else if (a->pg == a->rn) {
1103         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1104     } else {
1105         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1106     }
1107 }
1108
1109 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110 {
1111     tcg_gen_xor_i64(pd, pn, pm);
1112     tcg_gen_and_i64(pd, pd, pg);
1113 }
1114
1115 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                            TCGv_vec pm, TCGv_vec pg)
1117 {
1118     tcg_gen_xor_vec(vece, pd, pn, pm);
1119     tcg_gen_and_vec(vece, pd, pd, pg);
1120 }
1121
1122 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1123 {
1124     static const GVecGen4 op = {
1125         .fni8 = gen_eor_pg_i64,
1126         .fniv = gen_eor_pg_vec,
1127         .fno = gen_helper_sve_eor_pppp,
1128         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129     };
1130     if (a->s) {
1131         return do_pppp_flags(s, a, &op);
1132     } else {
1133         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1134     }
1135 }
1136
1137 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1138 {
1139     tcg_gen_and_i64(pn, pn, pg);
1140     tcg_gen_andc_i64(pm, pm, pg);
1141     tcg_gen_or_i64(pd, pn, pm);
1142 }
1143
1144 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1145                            TCGv_vec pm, TCGv_vec pg)
1146 {
1147     tcg_gen_and_vec(vece, pn, pn, pg);
1148     tcg_gen_andc_vec(vece, pm, pm, pg);
1149     tcg_gen_or_vec(vece, pd, pn, pm);
1150 }
1151
1152 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1153 {
1154     static const GVecGen4 op = {
1155         .fni8 = gen_sel_pg_i64,
1156         .fniv = gen_sel_pg_vec,
1157         .fno = gen_helper_sve_sel_pppp,
1158         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1159     };
1160     if (a->s) {
1161         return false;
1162     } else {
1163         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1164     }
1165 }
1166
1167 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1168 {
1169     tcg_gen_or_i64(pd, pn, pm);
1170     tcg_gen_and_i64(pd, pd, pg);
1171 }
1172
1173 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1174                            TCGv_vec pm, TCGv_vec pg)
1175 {
1176     tcg_gen_or_vec(vece, pd, pn, pm);
1177     tcg_gen_and_vec(vece, pd, pd, pg);
1178 }
1179
1180 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1181 {
1182     static const GVecGen4 op = {
1183         .fni8 = gen_orr_pg_i64,
1184         .fniv = gen_orr_pg_vec,
1185         .fno = gen_helper_sve_orr_pppp,
1186         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1187     };
1188     if (a->s) {
1189         return do_pppp_flags(s, a, &op);
1190     } else if (a->pg == a->rn && a->rn == a->rm) {
1191         return do_mov_p(s, a->rd, a->rn);
1192     } else {
1193         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1194     }
1195 }
1196
1197 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1198 {
1199     tcg_gen_orc_i64(pd, pn, pm);
1200     tcg_gen_and_i64(pd, pd, pg);
1201 }
1202
1203 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1204                            TCGv_vec pm, TCGv_vec pg)
1205 {
1206     tcg_gen_orc_vec(vece, pd, pn, pm);
1207     tcg_gen_and_vec(vece, pd, pd, pg);
1208 }
1209
1210 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1211 {
1212     static const GVecGen4 op = {
1213         .fni8 = gen_orn_pg_i64,
1214         .fniv = gen_orn_pg_vec,
1215         .fno = gen_helper_sve_orn_pppp,
1216         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1217     };
1218     if (a->s) {
1219         return do_pppp_flags(s, a, &op);
1220     } else {
1221         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1222     }
1223 }
1224
1225 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1226 {
1227     tcg_gen_or_i64(pd, pn, pm);
1228     tcg_gen_andc_i64(pd, pg, pd);
1229 }
1230
1231 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1232                            TCGv_vec pm, TCGv_vec pg)
1233 {
1234     tcg_gen_or_vec(vece, pd, pn, pm);
1235     tcg_gen_andc_vec(vece, pd, pg, pd);
1236 }
1237
1238 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1239 {
1240     static const GVecGen4 op = {
1241         .fni8 = gen_nor_pg_i64,
1242         .fniv = gen_nor_pg_vec,
1243         .fno = gen_helper_sve_nor_pppp,
1244         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1245     };
1246     if (a->s) {
1247         return do_pppp_flags(s, a, &op);
1248     } else {
1249         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1250     }
1251 }
1252
1253 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254 {
1255     tcg_gen_and_i64(pd, pn, pm);
1256     tcg_gen_andc_i64(pd, pg, pd);
1257 }
1258
1259 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260                            TCGv_vec pm, TCGv_vec pg)
1261 {
1262     tcg_gen_and_vec(vece, pd, pn, pm);
1263     tcg_gen_andc_vec(vece, pd, pg, pd);
1264 }
1265
1266 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1267 {
1268     static const GVecGen4 op = {
1269         .fni8 = gen_nand_pg_i64,
1270         .fniv = gen_nand_pg_vec,
1271         .fno = gen_helper_sve_nand_pppp,
1272         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273     };
1274     if (a->s) {
1275         return do_pppp_flags(s, a, &op);
1276     } else {
1277         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1278     }
1279 }
1280
1281 /*
1282  *** SVE Predicate Misc Group
1283  */
1284
1285 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1286 {
1287     if (sve_access_check(s)) {
1288         int nofs = pred_full_reg_offset(s, a->rn);
1289         int gofs = pred_full_reg_offset(s, a->pg);
1290         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1291
1292         if (words == 1) {
1293             TCGv_i64 pn = tcg_temp_new_i64();
1294             TCGv_i64 pg = tcg_temp_new_i64();
1295
1296             tcg_gen_ld_i64(pn, cpu_env, nofs);
1297             tcg_gen_ld_i64(pg, cpu_env, gofs);
1298             do_predtest1(pn, pg);
1299
1300             tcg_temp_free_i64(pn);
1301             tcg_temp_free_i64(pg);
1302         } else {
1303             do_predtest(s, nofs, gofs, words);
1304         }
1305     }
1306     return true;
1307 }
1308
1309 /* See the ARM pseudocode DecodePredCount.  */
1310 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1311 {
1312     unsigned elements = fullsz >> esz;
1313     unsigned bound;
1314
1315     switch (pattern) {
1316     case 0x0: /* POW2 */
1317         return pow2floor(elements);
1318     case 0x1: /* VL1 */
1319     case 0x2: /* VL2 */
1320     case 0x3: /* VL3 */
1321     case 0x4: /* VL4 */
1322     case 0x5: /* VL5 */
1323     case 0x6: /* VL6 */
1324     case 0x7: /* VL7 */
1325     case 0x8: /* VL8 */
1326         bound = pattern;
1327         break;
1328     case 0x9: /* VL16 */
1329     case 0xa: /* VL32 */
1330     case 0xb: /* VL64 */
1331     case 0xc: /* VL128 */
1332     case 0xd: /* VL256 */
1333         bound = 16 << (pattern - 9);
1334         break;
1335     case 0x1d: /* MUL4 */
1336         return elements - elements % 4;
1337     case 0x1e: /* MUL3 */
1338         return elements - elements % 3;
1339     case 0x1f: /* ALL */
1340         return elements;
1341     default:   /* #uimm5 */
1342         return 0;
1343     }
1344     return elements >= bound ? bound : 0;
1345 }
1346
1347 /* This handles all of the predicate initialization instructions,
1348  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1349  * so that decode_pred_count returns 0.  For SETFFR, we will have
1350  * set RD == 16 == FFR.
1351  */
1352 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1353 {
1354     if (!sve_access_check(s)) {
1355         return true;
1356     }
1357
1358     unsigned fullsz = vec_full_reg_size(s);
1359     unsigned ofs = pred_full_reg_offset(s, rd);
1360     unsigned numelem, setsz, i;
1361     uint64_t word, lastword;
1362     TCGv_i64 t;
1363
1364     numelem = decode_pred_count(fullsz, pat, esz);
1365
1366     /* Determine what we must store into each bit, and how many.  */
1367     if (numelem == 0) {
1368         lastword = word = 0;
1369         setsz = fullsz;
1370     } else {
1371         setsz = numelem << esz;
1372         lastword = word = pred_esz_masks[esz];
1373         if (setsz % 64) {
1374             lastword &= ~(-1ull << (setsz % 64));
1375         }
1376     }
1377
1378     t = tcg_temp_new_i64();
1379     if (fullsz <= 64) {
1380         tcg_gen_movi_i64(t, lastword);
1381         tcg_gen_st_i64(t, cpu_env, ofs);
1382         goto done;
1383     }
1384
1385     if (word == lastword) {
1386         unsigned maxsz = size_for_gvec(fullsz / 8);
1387         unsigned oprsz = size_for_gvec(setsz / 8);
1388
1389         if (oprsz * 8 == setsz) {
1390             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1391             goto done;
1392         }
1393         if (oprsz * 8 == setsz + 8) {
1394             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1395             tcg_gen_movi_i64(t, 0);
1396             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1397             goto done;
1398         }
1399     }
1400
1401     setsz /= 8;
1402     fullsz /= 8;
1403
1404     tcg_gen_movi_i64(t, word);
1405     for (i = 0; i < setsz; i += 8) {
1406         tcg_gen_st_i64(t, cpu_env, ofs + i);
1407     }
1408     if (lastword != word) {
1409         tcg_gen_movi_i64(t, lastword);
1410         tcg_gen_st_i64(t, cpu_env, ofs + i);
1411         i += 8;
1412     }
1413     if (i < fullsz) {
1414         tcg_gen_movi_i64(t, 0);
1415         for (; i < fullsz; i += 8) {
1416             tcg_gen_st_i64(t, cpu_env, ofs + i);
1417         }
1418     }
1419
1420  done:
1421     tcg_temp_free_i64(t);
1422
1423     /* PTRUES */
1424     if (setflag) {
1425         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1426         tcg_gen_movi_i32(cpu_CF, word == 0);
1427         tcg_gen_movi_i32(cpu_VF, 0);
1428         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1429     }
1430     return true;
1431 }
1432
1433 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1434 {
1435     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1436 }
1437
1438 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1439 {
1440     /* Note pat == 31 is #all, to set all elements.  */
1441     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1442 }
1443
1444 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1445 {
1446     /* Note pat == 32 is #unimp, to set no elements.  */
1447     return do_predset(s, 0, a->rd, 32, false);
1448 }
1449
1450 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1451 {
1452     /* The path through do_pppp_flags is complicated enough to want to avoid
1453      * duplication.  Frob the arguments into the form of a predicated AND.
1454      */
1455     arg_rprr_s alt_a = {
1456         .rd = a->rd, .pg = a->pg, .s = a->s,
1457         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1458     };
1459     return trans_AND_pppp(s, &alt_a, insn);
1460 }
1461
1462 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1463 {
1464     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1465 }
1466
1467 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1468 {
1469     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1470 }
1471
1472 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1473                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1474                                            TCGv_ptr, TCGv_i32))
1475 {
1476     if (!sve_access_check(s)) {
1477         return true;
1478     }
1479
1480     TCGv_ptr t_pd = tcg_temp_new_ptr();
1481     TCGv_ptr t_pg = tcg_temp_new_ptr();
1482     TCGv_i32 t;
1483     unsigned desc;
1484
1485     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1486     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1487
1488     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1489     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1490     t = tcg_const_i32(desc);
1491
1492     gen_fn(t, t_pd, t_pg, t);
1493     tcg_temp_free_ptr(t_pd);
1494     tcg_temp_free_ptr(t_pg);
1495
1496     do_pred_flags(t);
1497     tcg_temp_free_i32(t);
1498     return true;
1499 }
1500
1501 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1502 {
1503     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1504 }
1505
1506 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1507 {
1508     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1509 }
1510
1511 /*
1512  *** SVE Element Count Group
1513  */
1514
1515 /* Perform an inline saturating addition of a 32-bit value within
1516  * a 64-bit register.  The second operand is known to be positive,
1517  * which halves the comparisions we must perform to bound the result.
1518  */
1519 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1520 {
1521     int64_t ibound;
1522     TCGv_i64 bound;
1523     TCGCond cond;
1524
1525     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1526     if (u) {
1527         tcg_gen_ext32u_i64(reg, reg);
1528     } else {
1529         tcg_gen_ext32s_i64(reg, reg);
1530     }
1531     if (d) {
1532         tcg_gen_sub_i64(reg, reg, val);
1533         ibound = (u ? 0 : INT32_MIN);
1534         cond = TCG_COND_LT;
1535     } else {
1536         tcg_gen_add_i64(reg, reg, val);
1537         ibound = (u ? UINT32_MAX : INT32_MAX);
1538         cond = TCG_COND_GT;
1539     }
1540     bound = tcg_const_i64(ibound);
1541     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1542     tcg_temp_free_i64(bound);
1543 }
1544
1545 /* Similarly with 64-bit values.  */
1546 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1547 {
1548     TCGv_i64 t0 = tcg_temp_new_i64();
1549     TCGv_i64 t1 = tcg_temp_new_i64();
1550     TCGv_i64 t2;
1551
1552     if (u) {
1553         if (d) {
1554             tcg_gen_sub_i64(t0, reg, val);
1555             tcg_gen_movi_i64(t1, 0);
1556             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1557         } else {
1558             tcg_gen_add_i64(t0, reg, val);
1559             tcg_gen_movi_i64(t1, -1);
1560             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1561         }
1562     } else {
1563         if (d) {
1564             /* Detect signed overflow for subtraction.  */
1565             tcg_gen_xor_i64(t0, reg, val);
1566             tcg_gen_sub_i64(t1, reg, val);
1567             tcg_gen_xor_i64(reg, reg, t0);
1568             tcg_gen_and_i64(t0, t0, reg);
1569
1570             /* Bound the result.  */
1571             tcg_gen_movi_i64(reg, INT64_MIN);
1572             t2 = tcg_const_i64(0);
1573             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1574         } else {
1575             /* Detect signed overflow for addition.  */
1576             tcg_gen_xor_i64(t0, reg, val);
1577             tcg_gen_add_i64(reg, reg, val);
1578             tcg_gen_xor_i64(t1, reg, val);
1579             tcg_gen_andc_i64(t0, t1, t0);
1580
1581             /* Bound the result.  */
1582             tcg_gen_movi_i64(t1, INT64_MAX);
1583             t2 = tcg_const_i64(0);
1584             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1585         }
1586         tcg_temp_free_i64(t2);
1587     }
1588     tcg_temp_free_i64(t0);
1589     tcg_temp_free_i64(t1);
1590 }
1591
1592 /* Similarly with a vector and a scalar operand.  */
1593 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1594                               TCGv_i64 val, bool u, bool d)
1595 {
1596     unsigned vsz = vec_full_reg_size(s);
1597     TCGv_ptr dptr, nptr;
1598     TCGv_i32 t32, desc;
1599     TCGv_i64 t64;
1600
1601     dptr = tcg_temp_new_ptr();
1602     nptr = tcg_temp_new_ptr();
1603     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1604     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1605     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1606
1607     switch (esz) {
1608     case MO_8:
1609         t32 = tcg_temp_new_i32();
1610         tcg_gen_extrl_i64_i32(t32, val);
1611         if (d) {
1612             tcg_gen_neg_i32(t32, t32);
1613         }
1614         if (u) {
1615             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1616         } else {
1617             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1618         }
1619         tcg_temp_free_i32(t32);
1620         break;
1621
1622     case MO_16:
1623         t32 = tcg_temp_new_i32();
1624         tcg_gen_extrl_i64_i32(t32, val);
1625         if (d) {
1626             tcg_gen_neg_i32(t32, t32);
1627         }
1628         if (u) {
1629             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1630         } else {
1631             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1632         }
1633         tcg_temp_free_i32(t32);
1634         break;
1635
1636     case MO_32:
1637         t64 = tcg_temp_new_i64();
1638         if (d) {
1639             tcg_gen_neg_i64(t64, val);
1640         } else {
1641             tcg_gen_mov_i64(t64, val);
1642         }
1643         if (u) {
1644             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1645         } else {
1646             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1647         }
1648         tcg_temp_free_i64(t64);
1649         break;
1650
1651     case MO_64:
1652         if (u) {
1653             if (d) {
1654                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1655             } else {
1656                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1657             }
1658         } else if (d) {
1659             t64 = tcg_temp_new_i64();
1660             tcg_gen_neg_i64(t64, val);
1661             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1662             tcg_temp_free_i64(t64);
1663         } else {
1664             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1665         }
1666         break;
1667
1668     default:
1669         g_assert_not_reached();
1670     }
1671
1672     tcg_temp_free_ptr(dptr);
1673     tcg_temp_free_ptr(nptr);
1674     tcg_temp_free_i32(desc);
1675 }
1676
1677 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1678 {
1679     if (sve_access_check(s)) {
1680         unsigned fullsz = vec_full_reg_size(s);
1681         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1682         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1683     }
1684     return true;
1685 }
1686
1687 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1688 {
1689     if (sve_access_check(s)) {
1690         unsigned fullsz = vec_full_reg_size(s);
1691         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1692         int inc = numelem * a->imm * (a->d ? -1 : 1);
1693         TCGv_i64 reg = cpu_reg(s, a->rd);
1694
1695         tcg_gen_addi_i64(reg, reg, inc);
1696     }
1697     return true;
1698 }
1699
1700 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1701                                uint32_t insn)
1702 {
1703     if (!sve_access_check(s)) {
1704         return true;
1705     }
1706
1707     unsigned fullsz = vec_full_reg_size(s);
1708     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709     int inc = numelem * a->imm;
1710     TCGv_i64 reg = cpu_reg(s, a->rd);
1711
1712     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1713     if (inc == 0) {
1714         if (a->u) {
1715             tcg_gen_ext32u_i64(reg, reg);
1716         } else {
1717             tcg_gen_ext32s_i64(reg, reg);
1718         }
1719     } else {
1720         TCGv_i64 t = tcg_const_i64(inc);
1721         do_sat_addsub_32(reg, t, a->u, a->d);
1722         tcg_temp_free_i64(t);
1723     }
1724     return true;
1725 }
1726
1727 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1728                                uint32_t insn)
1729 {
1730     if (!sve_access_check(s)) {
1731         return true;
1732     }
1733
1734     unsigned fullsz = vec_full_reg_size(s);
1735     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736     int inc = numelem * a->imm;
1737     TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739     if (inc != 0) {
1740         TCGv_i64 t = tcg_const_i64(inc);
1741         do_sat_addsub_64(reg, t, a->u, a->d);
1742         tcg_temp_free_i64(t);
1743     }
1744     return true;
1745 }
1746
1747 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1748 {
1749     if (a->esz == 0) {
1750         return false;
1751     }
1752
1753     unsigned fullsz = vec_full_reg_size(s);
1754     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1755     int inc = numelem * a->imm;
1756
1757     if (inc != 0) {
1758         if (sve_access_check(s)) {
1759             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1760             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1761                               vec_full_reg_offset(s, a->rn),
1762                               t, fullsz, fullsz);
1763             tcg_temp_free_i64(t);
1764         }
1765     } else {
1766         do_mov_z(s, a->rd, a->rn);
1767     }
1768     return true;
1769 }
1770
1771 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1772                             uint32_t insn)
1773 {
1774     if (a->esz == 0) {
1775         return false;
1776     }
1777
1778     unsigned fullsz = vec_full_reg_size(s);
1779     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780     int inc = numelem * a->imm;
1781
1782     if (inc != 0) {
1783         if (sve_access_check(s)) {
1784             TCGv_i64 t = tcg_const_i64(inc);
1785             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1786             tcg_temp_free_i64(t);
1787         }
1788     } else {
1789         do_mov_z(s, a->rd, a->rn);
1790     }
1791     return true;
1792 }
1793
1794 /*
1795  *** SVE Bitwise Immediate Group
1796  */
1797
1798 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1799 {
1800     uint64_t imm;
1801     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1802                                 extract32(a->dbm, 0, 6),
1803                                 extract32(a->dbm, 6, 6))) {
1804         return false;
1805     }
1806     if (sve_access_check(s)) {
1807         unsigned vsz = vec_full_reg_size(s);
1808         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1809                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1810     }
1811     return true;
1812 }
1813
1814 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1815 {
1816     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1817 }
1818
1819 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1820 {
1821     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1822 }
1823
1824 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1825 {
1826     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1827 }
1828
1829 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1830 {
1831     uint64_t imm;
1832     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1833                                 extract32(a->dbm, 0, 6),
1834                                 extract32(a->dbm, 6, 6))) {
1835         return false;
1836     }
1837     if (sve_access_check(s)) {
1838         do_dupi_z(s, a->rd, imm);
1839     }
1840     return true;
1841 }
1842
1843 /*
1844  *** SVE Integer Wide Immediate - Predicated Group
1845  */
1846
1847 /* Implement all merging copies.  This is used for CPY (immediate),
1848  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1849  */
1850 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1851                      TCGv_i64 val)
1852 {
1853     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1854     static gen_cpy * const fns[4] = {
1855         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1856         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1857     };
1858     unsigned vsz = vec_full_reg_size(s);
1859     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1860     TCGv_ptr t_zd = tcg_temp_new_ptr();
1861     TCGv_ptr t_zn = tcg_temp_new_ptr();
1862     TCGv_ptr t_pg = tcg_temp_new_ptr();
1863
1864     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1865     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1866     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1867
1868     fns[esz](t_zd, t_zn, t_pg, val, desc);
1869
1870     tcg_temp_free_ptr(t_zd);
1871     tcg_temp_free_ptr(t_zn);
1872     tcg_temp_free_ptr(t_pg);
1873     tcg_temp_free_i32(desc);
1874 }
1875
1876 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1877 {
1878     if (a->esz == 0) {
1879         return false;
1880     }
1881     if (sve_access_check(s)) {
1882         /* Decode the VFP immediate.  */
1883         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1884         TCGv_i64 t_imm = tcg_const_i64(imm);
1885         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1886         tcg_temp_free_i64(t_imm);
1887     }
1888     return true;
1889 }
1890
1891 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1892 {
1893     if (a->esz == 0 && extract32(insn, 13, 1)) {
1894         return false;
1895     }
1896     if (sve_access_check(s)) {
1897         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1898         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1899         tcg_temp_free_i64(t_imm);
1900     }
1901     return true;
1902 }
1903
1904 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1905 {
1906     static gen_helper_gvec_2i * const fns[4] = {
1907         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1908         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1909     };
1910
1911     if (a->esz == 0 && extract32(insn, 13, 1)) {
1912         return false;
1913     }
1914     if (sve_access_check(s)) {
1915         unsigned vsz = vec_full_reg_size(s);
1916         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1917         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1918                             pred_full_reg_offset(s, a->pg),
1919                             t_imm, vsz, vsz, 0, fns[a->esz]);
1920         tcg_temp_free_i64(t_imm);
1921     }
1922     return true;
1923 }
1924
1925 /*
1926  *** SVE Permute Extract Group
1927  */
1928
1929 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1930 {
1931     if (!sve_access_check(s)) {
1932         return true;
1933     }
1934
1935     unsigned vsz = vec_full_reg_size(s);
1936     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1937     unsigned n_siz = vsz - n_ofs;
1938     unsigned d = vec_full_reg_offset(s, a->rd);
1939     unsigned n = vec_full_reg_offset(s, a->rn);
1940     unsigned m = vec_full_reg_offset(s, a->rm);
1941
1942     /* Use host vector move insns if we have appropriate sizes
1943      * and no unfortunate overlap.
1944      */
1945     if (m != d
1946         && n_ofs == size_for_gvec(n_ofs)
1947         && n_siz == size_for_gvec(n_siz)
1948         && (d != n || n_siz <= n_ofs)) {
1949         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1950         if (n_ofs != 0) {
1951             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1952         }
1953     } else {
1954         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1955     }
1956     return true;
1957 }
1958
1959 /*
1960  *** SVE Permute - Unpredicated Group
1961  */
1962
1963 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1964 {
1965     if (sve_access_check(s)) {
1966         unsigned vsz = vec_full_reg_size(s);
1967         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1968                              vsz, vsz, cpu_reg_sp(s, a->rn));
1969     }
1970     return true;
1971 }
1972
1973 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1974 {
1975     if ((a->imm & 0x1f) == 0) {
1976         return false;
1977     }
1978     if (sve_access_check(s)) {
1979         unsigned vsz = vec_full_reg_size(s);
1980         unsigned dofs = vec_full_reg_offset(s, a->rd);
1981         unsigned esz, index;
1982
1983         esz = ctz32(a->imm);
1984         index = a->imm >> (esz + 1);
1985
1986         if ((index << esz) < vsz) {
1987             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1988             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1989         } else {
1990             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
1991         }
1992     }
1993     return true;
1994 }
1995
1996 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
1997 {
1998     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1999     static gen_insr * const fns[4] = {
2000         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2001         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2002     };
2003     unsigned vsz = vec_full_reg_size(s);
2004     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2005     TCGv_ptr t_zd = tcg_temp_new_ptr();
2006     TCGv_ptr t_zn = tcg_temp_new_ptr();
2007
2008     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2009     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2010
2011     fns[a->esz](t_zd, t_zn, val, desc);
2012
2013     tcg_temp_free_ptr(t_zd);
2014     tcg_temp_free_ptr(t_zn);
2015     tcg_temp_free_i32(desc);
2016 }
2017
2018 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2019 {
2020     if (sve_access_check(s)) {
2021         TCGv_i64 t = tcg_temp_new_i64();
2022         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2023         do_insr_i64(s, a, t);
2024         tcg_temp_free_i64(t);
2025     }
2026     return true;
2027 }
2028
2029 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2030 {
2031     if (sve_access_check(s)) {
2032         do_insr_i64(s, a, cpu_reg(s, a->rm));
2033     }
2034     return true;
2035 }
2036
2037 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2038 {
2039     static gen_helper_gvec_2 * const fns[4] = {
2040         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2041         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2042     };
2043
2044     if (sve_access_check(s)) {
2045         unsigned vsz = vec_full_reg_size(s);
2046         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2047                            vec_full_reg_offset(s, a->rn),
2048                            vsz, vsz, 0, fns[a->esz]);
2049     }
2050     return true;
2051 }
2052
2053 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2054 {
2055     static gen_helper_gvec_3 * const fns[4] = {
2056         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2057         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2058     };
2059
2060     if (sve_access_check(s)) {
2061         unsigned vsz = vec_full_reg_size(s);
2062         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2063                            vec_full_reg_offset(s, a->rn),
2064                            vec_full_reg_offset(s, a->rm),
2065                            vsz, vsz, 0, fns[a->esz]);
2066     }
2067     return true;
2068 }
2069
2070 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2071 {
2072     static gen_helper_gvec_2 * const fns[4][2] = {
2073         { NULL, NULL },
2074         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2075         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2076         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2077     };
2078
2079     if (a->esz == 0) {
2080         return false;
2081     }
2082     if (sve_access_check(s)) {
2083         unsigned vsz = vec_full_reg_size(s);
2084         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2085                            vec_full_reg_offset(s, a->rn)
2086                            + (a->h ? vsz / 2 : 0),
2087                            vsz, vsz, 0, fns[a->esz][a->u]);
2088     }
2089     return true;
2090 }
2091
2092 /*
2093  *** SVE Permute - Predicates Group
2094  */
2095
2096 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2097                           gen_helper_gvec_3 *fn)
2098 {
2099     if (!sve_access_check(s)) {
2100         return true;
2101     }
2102
2103     unsigned vsz = pred_full_reg_size(s);
2104
2105     /* Predicate sizes may be smaller and cannot use simd_desc.
2106        We cannot round up, as we do elsewhere, because we need
2107        the exact size for ZIP2 and REV.  We retain the style for
2108        the other helpers for consistency.  */
2109     TCGv_ptr t_d = tcg_temp_new_ptr();
2110     TCGv_ptr t_n = tcg_temp_new_ptr();
2111     TCGv_ptr t_m = tcg_temp_new_ptr();
2112     TCGv_i32 t_desc;
2113     int desc;
2114
2115     desc = vsz - 2;
2116     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2117     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2118
2119     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2120     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2121     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2122     t_desc = tcg_const_i32(desc);
2123
2124     fn(t_d, t_n, t_m, t_desc);
2125
2126     tcg_temp_free_ptr(t_d);
2127     tcg_temp_free_ptr(t_n);
2128     tcg_temp_free_ptr(t_m);
2129     tcg_temp_free_i32(t_desc);
2130     return true;
2131 }
2132
2133 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2134                           gen_helper_gvec_2 *fn)
2135 {
2136     if (!sve_access_check(s)) {
2137         return true;
2138     }
2139
2140     unsigned vsz = pred_full_reg_size(s);
2141     TCGv_ptr t_d = tcg_temp_new_ptr();
2142     TCGv_ptr t_n = tcg_temp_new_ptr();
2143     TCGv_i32 t_desc;
2144     int desc;
2145
2146     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148
2149     /* Predicate sizes may be smaller and cannot use simd_desc.
2150        We cannot round up, as we do elsewhere, because we need
2151        the exact size for ZIP2 and REV.  We retain the style for
2152        the other helpers for consistency.  */
2153
2154     desc = vsz - 2;
2155     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2156     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2157     t_desc = tcg_const_i32(desc);
2158
2159     fn(t_d, t_n, t_desc);
2160
2161     tcg_temp_free_i32(t_desc);
2162     tcg_temp_free_ptr(t_d);
2163     tcg_temp_free_ptr(t_n);
2164     return true;
2165 }
2166
2167 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2168 {
2169     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2170 }
2171
2172 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2173 {
2174     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2175 }
2176
2177 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2178 {
2179     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2180 }
2181
2182 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2183 {
2184     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2185 }
2186
2187 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2188 {
2189     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2190 }
2191
2192 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2193 {
2194     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2195 }
2196
2197 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2198 {
2199     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2200 }
2201
2202 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2203 {
2204     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2205 }
2206
2207 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2208 {
2209     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2210 }
2211
2212 /*
2213  *** SVE Permute - Interleaving Group
2214  */
2215
2216 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2217 {
2218     static gen_helper_gvec_3 * const fns[4] = {
2219         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2220         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2221     };
2222
2223     if (sve_access_check(s)) {
2224         unsigned vsz = vec_full_reg_size(s);
2225         unsigned high_ofs = high ? vsz / 2 : 0;
2226         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2227                            vec_full_reg_offset(s, a->rn) + high_ofs,
2228                            vec_full_reg_offset(s, a->rm) + high_ofs,
2229                            vsz, vsz, 0, fns[a->esz]);
2230     }
2231     return true;
2232 }
2233
2234 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2235                             gen_helper_gvec_3 *fn)
2236 {
2237     if (sve_access_check(s)) {
2238         unsigned vsz = vec_full_reg_size(s);
2239         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2240                            vec_full_reg_offset(s, a->rn),
2241                            vec_full_reg_offset(s, a->rm),
2242                            vsz, vsz, data, fn);
2243     }
2244     return true;
2245 }
2246
2247 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2248 {
2249     return do_zip(s, a, false);
2250 }
2251
2252 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2253 {
2254     return do_zip(s, a, true);
2255 }
2256
2257 static gen_helper_gvec_3 * const uzp_fns[4] = {
2258     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2259     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2260 };
2261
2262 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2263 {
2264     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2265 }
2266
2267 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2268 {
2269     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2270 }
2271
2272 static gen_helper_gvec_3 * const trn_fns[4] = {
2273     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2274     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2275 };
2276
2277 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2278 {
2279     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2280 }
2281
2282 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2283 {
2284     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2285 }
2286
2287 /*
2288  *** SVE Permute Vector - Predicated Group
2289  */
2290
2291 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2292 {
2293     static gen_helper_gvec_3 * const fns[4] = {
2294         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2295     };
2296     return do_zpz_ool(s, a, fns[a->esz]);
2297 }
2298
2299 /* Call the helper that computes the ARM LastActiveElement pseudocode
2300  * function, scaled by the element size.  This includes the not found
2301  * indication; e.g. not found for esz=3 is -8.
2302  */
2303 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2304 {
2305     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2306      * round up, as we do elsewhere, because we need the exact size.
2307      */
2308     TCGv_ptr t_p = tcg_temp_new_ptr();
2309     TCGv_i32 t_desc;
2310     unsigned vsz = pred_full_reg_size(s);
2311     unsigned desc;
2312
2313     desc = vsz - 2;
2314     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2315
2316     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2317     t_desc = tcg_const_i32(desc);
2318
2319     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2320
2321     tcg_temp_free_i32(t_desc);
2322     tcg_temp_free_ptr(t_p);
2323 }
2324
2325 /* Increment LAST to the offset of the next element in the vector,
2326  * wrapping around to 0.
2327  */
2328 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2329 {
2330     unsigned vsz = vec_full_reg_size(s);
2331
2332     tcg_gen_addi_i32(last, last, 1 << esz);
2333     if (is_power_of_2(vsz)) {
2334         tcg_gen_andi_i32(last, last, vsz - 1);
2335     } else {
2336         TCGv_i32 max = tcg_const_i32(vsz);
2337         TCGv_i32 zero = tcg_const_i32(0);
2338         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2339         tcg_temp_free_i32(max);
2340         tcg_temp_free_i32(zero);
2341     }
2342 }
2343
2344 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2345 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2346 {
2347     unsigned vsz = vec_full_reg_size(s);
2348
2349     if (is_power_of_2(vsz)) {
2350         tcg_gen_andi_i32(last, last, vsz - 1);
2351     } else {
2352         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2353         TCGv_i32 zero = tcg_const_i32(0);
2354         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2355         tcg_temp_free_i32(max);
2356         tcg_temp_free_i32(zero);
2357     }
2358 }
2359
2360 /* Load an unsigned element of ESZ from BASE+OFS.  */
2361 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2362 {
2363     TCGv_i64 r = tcg_temp_new_i64();
2364
2365     switch (esz) {
2366     case 0:
2367         tcg_gen_ld8u_i64(r, base, ofs);
2368         break;
2369     case 1:
2370         tcg_gen_ld16u_i64(r, base, ofs);
2371         break;
2372     case 2:
2373         tcg_gen_ld32u_i64(r, base, ofs);
2374         break;
2375     case 3:
2376         tcg_gen_ld_i64(r, base, ofs);
2377         break;
2378     default:
2379         g_assert_not_reached();
2380     }
2381     return r;
2382 }
2383
2384 /* Load an unsigned element of ESZ from RM[LAST].  */
2385 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2386                                  int rm, int esz)
2387 {
2388     TCGv_ptr p = tcg_temp_new_ptr();
2389     TCGv_i64 r;
2390
2391     /* Convert offset into vector into offset into ENV.
2392      * The final adjustment for the vector register base
2393      * is added via constant offset to the load.
2394      */
2395 #ifdef HOST_WORDS_BIGENDIAN
2396     /* Adjust for element ordering.  See vec_reg_offset.  */
2397     if (esz < 3) {
2398         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2399     }
2400 #endif
2401     tcg_gen_ext_i32_ptr(p, last);
2402     tcg_gen_add_ptr(p, p, cpu_env);
2403
2404     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2405     tcg_temp_free_ptr(p);
2406
2407     return r;
2408 }
2409
2410 /* Compute CLAST for a Zreg.  */
2411 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2412 {
2413     TCGv_i32 last;
2414     TCGLabel *over;
2415     TCGv_i64 ele;
2416     unsigned vsz, esz = a->esz;
2417
2418     if (!sve_access_check(s)) {
2419         return true;
2420     }
2421
2422     last = tcg_temp_local_new_i32();
2423     over = gen_new_label();
2424
2425     find_last_active(s, last, esz, a->pg);
2426
2427     /* There is of course no movcond for a 2048-bit vector,
2428      * so we must branch over the actual store.
2429      */
2430     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2431
2432     if (!before) {
2433         incr_last_active(s, last, esz);
2434     }
2435
2436     ele = load_last_active(s, last, a->rm, esz);
2437     tcg_temp_free_i32(last);
2438
2439     vsz = vec_full_reg_size(s);
2440     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2441     tcg_temp_free_i64(ele);
2442
2443     /* If this insn used MOVPRFX, we may need a second move.  */
2444     if (a->rd != a->rn) {
2445         TCGLabel *done = gen_new_label();
2446         tcg_gen_br(done);
2447
2448         gen_set_label(over);
2449         do_mov_z(s, a->rd, a->rn);
2450
2451         gen_set_label(done);
2452     } else {
2453         gen_set_label(over);
2454     }
2455     return true;
2456 }
2457
2458 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2459 {
2460     return do_clast_vector(s, a, false);
2461 }
2462
2463 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2464 {
2465     return do_clast_vector(s, a, true);
2466 }
2467
2468 /* Compute CLAST for a scalar.  */
2469 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2470                             bool before, TCGv_i64 reg_val)
2471 {
2472     TCGv_i32 last = tcg_temp_new_i32();
2473     TCGv_i64 ele, cmp, zero;
2474
2475     find_last_active(s, last, esz, pg);
2476
2477     /* Extend the original value of last prior to incrementing.  */
2478     cmp = tcg_temp_new_i64();
2479     tcg_gen_ext_i32_i64(cmp, last);
2480
2481     if (!before) {
2482         incr_last_active(s, last, esz);
2483     }
2484
2485     /* The conceit here is that while last < 0 indicates not found, after
2486      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2487      * from which we can load garbage.  We then discard the garbage with
2488      * a conditional move.
2489      */
2490     ele = load_last_active(s, last, rm, esz);
2491     tcg_temp_free_i32(last);
2492
2493     zero = tcg_const_i64(0);
2494     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2495
2496     tcg_temp_free_i64(zero);
2497     tcg_temp_free_i64(cmp);
2498     tcg_temp_free_i64(ele);
2499 }
2500
2501 /* Compute CLAST for a Vreg.  */
2502 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2503 {
2504     if (sve_access_check(s)) {
2505         int esz = a->esz;
2506         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2507         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2508
2509         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2510         write_fp_dreg(s, a->rd, reg);
2511         tcg_temp_free_i64(reg);
2512     }
2513     return true;
2514 }
2515
2516 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2517 {
2518     return do_clast_fp(s, a, false);
2519 }
2520
2521 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2522 {
2523     return do_clast_fp(s, a, true);
2524 }
2525
2526 /* Compute CLAST for a Xreg.  */
2527 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2528 {
2529     TCGv_i64 reg;
2530
2531     if (!sve_access_check(s)) {
2532         return true;
2533     }
2534
2535     reg = cpu_reg(s, a->rd);
2536     switch (a->esz) {
2537     case 0:
2538         tcg_gen_ext8u_i64(reg, reg);
2539         break;
2540     case 1:
2541         tcg_gen_ext16u_i64(reg, reg);
2542         break;
2543     case 2:
2544         tcg_gen_ext32u_i64(reg, reg);
2545         break;
2546     case 3:
2547         break;
2548     default:
2549         g_assert_not_reached();
2550     }
2551
2552     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2553     return true;
2554 }
2555
2556 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2557 {
2558     return do_clast_general(s, a, false);
2559 }
2560
2561 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2562 {
2563     return do_clast_general(s, a, true);
2564 }
2565
2566 /* Compute LAST for a scalar.  */
2567 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2568                                int pg, int rm, bool before)
2569 {
2570     TCGv_i32 last = tcg_temp_new_i32();
2571     TCGv_i64 ret;
2572
2573     find_last_active(s, last, esz, pg);
2574     if (before) {
2575         wrap_last_active(s, last, esz);
2576     } else {
2577         incr_last_active(s, last, esz);
2578     }
2579
2580     ret = load_last_active(s, last, rm, esz);
2581     tcg_temp_free_i32(last);
2582     return ret;
2583 }
2584
2585 /* Compute LAST for a Vreg.  */
2586 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2587 {
2588     if (sve_access_check(s)) {
2589         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2590         write_fp_dreg(s, a->rd, val);
2591         tcg_temp_free_i64(val);
2592     }
2593     return true;
2594 }
2595
2596 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2597 {
2598     return do_last_fp(s, a, false);
2599 }
2600
2601 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2602 {
2603     return do_last_fp(s, a, true);
2604 }
2605
2606 /* Compute LAST for a Xreg.  */
2607 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2608 {
2609     if (sve_access_check(s)) {
2610         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2611         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2612         tcg_temp_free_i64(val);
2613     }
2614     return true;
2615 }
2616
2617 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619     return do_last_general(s, a, false);
2620 }
2621
2622 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623 {
2624     return do_last_general(s, a, true);
2625 }
2626
2627 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2628 {
2629     if (sve_access_check(s)) {
2630         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2631     }
2632     return true;
2633 }
2634
2635 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2636 {
2637     if (sve_access_check(s)) {
2638         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2639         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2640         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2641         tcg_temp_free_i64(t);
2642     }
2643     return true;
2644 }
2645
2646 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2647 {
2648     static gen_helper_gvec_3 * const fns[4] = {
2649         NULL,
2650         gen_helper_sve_revb_h,
2651         gen_helper_sve_revb_s,
2652         gen_helper_sve_revb_d,
2653     };
2654     return do_zpz_ool(s, a, fns[a->esz]);
2655 }
2656
2657 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658 {
2659     static gen_helper_gvec_3 * const fns[4] = {
2660         NULL,
2661         NULL,
2662         gen_helper_sve_revh_s,
2663         gen_helper_sve_revh_d,
2664     };
2665     return do_zpz_ool(s, a, fns[a->esz]);
2666 }
2667
2668 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669 {
2670     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2671 }
2672
2673 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674 {
2675     static gen_helper_gvec_3 * const fns[4] = {
2676         gen_helper_sve_rbit_b,
2677         gen_helper_sve_rbit_h,
2678         gen_helper_sve_rbit_s,
2679         gen_helper_sve_rbit_d,
2680     };
2681     return do_zpz_ool(s, a, fns[a->esz]);
2682 }
2683
2684 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2685 {
2686     if (sve_access_check(s)) {
2687         unsigned vsz = vec_full_reg_size(s);
2688         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2689                            vec_full_reg_offset(s, a->rn),
2690                            vec_full_reg_offset(s, a->rm),
2691                            pred_full_reg_offset(s, a->pg),
2692                            vsz, vsz, a->esz, gen_helper_sve_splice);
2693     }
2694     return true;
2695 }
2696
2697 /*
2698  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
2699  */
2700
2701 /* Subroutine loading a vector register at VOFS of LEN bytes.
2702  * The load should begin at the address Rn + IMM.
2703  */
2704
2705 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
2706                    int rn, int imm)
2707 {
2708     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
2709     uint32_t len_remain = len % 8;
2710     uint32_t nparts = len / 8 + ctpop8(len_remain);
2711     int midx = get_mem_index(s);
2712     TCGv_i64 addr, t0, t1;
2713
2714     addr = tcg_temp_new_i64();
2715     t0 = tcg_temp_new_i64();
2716
2717     /* Note that unpredicated load/store of vector/predicate registers
2718      * are defined as a stream of bytes, which equates to little-endian
2719      * operations on larger quantities.  There is no nice way to force
2720      * a little-endian load for aarch64_be-linux-user out of line.
2721      *
2722      * Attempt to keep code expansion to a minimum by limiting the
2723      * amount of unrolling done.
2724      */
2725     if (nparts <= 4) {
2726         int i;
2727
2728         for (i = 0; i < len_align; i += 8) {
2729             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
2730             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2731             tcg_gen_st_i64(t0, cpu_env, vofs + i);
2732         }
2733     } else {
2734         TCGLabel *loop = gen_new_label();
2735         TCGv_ptr tp, i = tcg_const_local_ptr(0);
2736
2737         gen_set_label(loop);
2738
2739         /* Minimize the number of local temps that must be re-read from
2740          * the stack each iteration.  Instead, re-compute values other
2741          * than the loop counter.
2742          */
2743         tp = tcg_temp_new_ptr();
2744         tcg_gen_addi_ptr(tp, i, imm);
2745         tcg_gen_extu_ptr_i64(addr, tp);
2746         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
2747
2748         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2749
2750         tcg_gen_add_ptr(tp, cpu_env, i);
2751         tcg_gen_addi_ptr(i, i, 8);
2752         tcg_gen_st_i64(t0, tp, vofs);
2753         tcg_temp_free_ptr(tp);
2754
2755         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
2756         tcg_temp_free_ptr(i);
2757     }
2758
2759     /* Predicate register loads can be any multiple of 2.
2760      * Note that we still store the entire 64-bit unit into cpu_env.
2761      */
2762     if (len_remain) {
2763         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
2764
2765         switch (len_remain) {
2766         case 2:
2767         case 4:
2768         case 8:
2769             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
2770             break;
2771
2772         case 6:
2773             t1 = tcg_temp_new_i64();
2774             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
2775             tcg_gen_addi_i64(addr, addr, 4);
2776             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
2777             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
2778             tcg_temp_free_i64(t1);
2779             break;
2780
2781         default:
2782             g_assert_not_reached();
2783         }
2784         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
2785     }
2786     tcg_temp_free_i64(addr);
2787     tcg_temp_free_i64(t0);
2788 }
2789
2790 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
2791 {
2792     if (sve_access_check(s)) {
2793         int size = vec_full_reg_size(s);
2794         int off = vec_full_reg_offset(s, a->rd);
2795         do_ldr(s, off, size, a->rn, a->imm * size);
2796     }
2797     return true;
2798 }
2799
2800 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
2801 {
2802     if (sve_access_check(s)) {
2803         int size = pred_full_reg_size(s);
2804         int off = pred_full_reg_offset(s, a->rd);
2805         do_ldr(s, off, size, a->rn, a->imm * size);
2806     }
2807     return true;
2808 }