target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36 /*
  37  * Helpers for extracting complex instruction fields.
  38  */
  39
  40 /* See e.g. ASR (immediate, predicated).
  41  * Returns -1 for unallocated encoding; diagnose later.
  42  */
  43 static int tszimm_esz(int x)
  44 {
  45     x >>= 3;  /* discard imm3 */
  46     return 31 - clz32(x);
  47 }
  48
  49 static int tszimm_shr(int x)
  50 {
  51     return (16 << tszimm_esz(x)) - x;
  52 }
  53
  54 /* See e.g. LSL (immediate, predicated).  */
  55 static int tszimm_shl(int x)
  56 {
  57     return x - (8 << tszimm_esz(x));
  58 }
  59
  60 static inline int plus1(int x)
  61 {
  62     return x + 1;
  63 }
  64
  65 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  66 static inline int expand_imm_sh8s(int x)
  67 {
  68     return (int8_t)x << (x & 0x100 ? 8 : 0);
  69 }
  70
  71 /*
  72  * Include the generated decoder.
  73  */
  74
  75 #include "decode-sve.inc.c"
  76
  77 /*
  78  * Implement all of the translator functions referenced by the decoder.
  79  */
  80
  81 /* Return the offset info CPUARMState of the predicate vector register Pn.
  82  * Note for this purpose, FFR is P16.
  83  */
  84 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  85 {
  86     return offsetof(CPUARMState, vfp.pregs[regno]);
  87 }
  88
  89 /* Return the byte size of the whole predicate register, VL / 64.  */
  90 static inline int pred_full_reg_size(DisasContext *s)
  91 {
  92     return s->sve_len >> 3;
  93 }
  94
  95 /* Round up the size of a register to a size allowed by
  96  * the tcg vector infrastructure.  Any operation which uses this
  97  * size may assume that the bits above pred_full_reg_size are zero,
  98  * and must leave them the same way.
  99  *
 100  * Note that this is not needed for the vector registers as they
 101  * are always properly sized for tcg vectors.
 102  */
 103 static int size_for_gvec(int size)
 104 {
 105     if (size <= 8) {
 106         return 8;
 107     } else {
 108         return QEMU_ALIGN_UP(size, 16);
 109     }
 110 }
 111
 112 static int pred_gvec_reg_size(DisasContext *s)
 113 {
 114     return size_for_gvec(pred_full_reg_size(s));
 115 }
 116
 117 /* Invoke a vector expander on two Zregs.  */
 118 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 119                          int esz, int rd, int rn)
 120 {
 121     if (sve_access_check(s)) {
 122         unsigned vsz = vec_full_reg_size(s);
 123         gvec_fn(esz, vec_full_reg_offset(s, rd),
 124                 vec_full_reg_offset(s, rn), vsz, vsz);
 125     }
 126     return true;
 127 }
 128
 129 /* Invoke a vector expander on three Zregs.  */
 130 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 131                          int esz, int rd, int rn, int rm)
 132 {
 133     if (sve_access_check(s)) {
 134         unsigned vsz = vec_full_reg_size(s);
 135         gvec_fn(esz, vec_full_reg_offset(s, rd),
 136                 vec_full_reg_offset(s, rn),
 137                 vec_full_reg_offset(s, rm), vsz, vsz);
 138     }
 139     return true;
 140 }
 141
 142 /* Invoke a vector move on two Zregs.  */
 143 static bool do_mov_z(DisasContext *s, int rd, int rn)
 144 {
 145     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 146 }
 147
 148 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 149 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 150 {
 151     unsigned vsz = vec_full_reg_size(s);
 152     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 153 }
 154
 155 /* Invoke a vector expander on two Pregs.  */
 156 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 157                          int esz, int rd, int rn)
 158 {
 159     if (sve_access_check(s)) {
 160         unsigned psz = pred_gvec_reg_size(s);
 161         gvec_fn(esz, pred_full_reg_offset(s, rd),
 162                 pred_full_reg_offset(s, rn), psz, psz);
 163     }
 164     return true;
 165 }
 166
 167 /* Invoke a vector expander on three Pregs.  */
 168 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 169                          int esz, int rd, int rn, int rm)
 170 {
 171     if (sve_access_check(s)) {
 172         unsigned psz = pred_gvec_reg_size(s);
 173         gvec_fn(esz, pred_full_reg_offset(s, rd),
 174                 pred_full_reg_offset(s, rn),
 175                 pred_full_reg_offset(s, rm), psz, psz);
 176     }
 177     return true;
 178 }
 179
 180 /* Invoke a vector operation on four Pregs.  */
 181 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 182                         int rd, int rn, int rm, int rg)
 183 {
 184     if (sve_access_check(s)) {
 185         unsigned psz = pred_gvec_reg_size(s);
 186         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 187                        pred_full_reg_offset(s, rn),
 188                        pred_full_reg_offset(s, rm),
 189                        pred_full_reg_offset(s, rg),
 190                        psz, psz, gvec_op);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector move on two Pregs.  */
 196 static bool do_mov_p(DisasContext *s, int rd, int rn)
 197 {
 198     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 199 }
 200
 201 /* Set the cpu flags as per a return from an SVE helper.  */
 202 static void do_pred_flags(TCGv_i32 t)
 203 {
 204     tcg_gen_mov_i32(cpu_NF, t);
 205     tcg_gen_andi_i32(cpu_ZF, t, 2);
 206     tcg_gen_andi_i32(cpu_CF, t, 1);
 207     tcg_gen_movi_i32(cpu_VF, 0);
 208 }
 209
 210 /* Subroutines computing the ARM PredTest psuedofunction.  */
 211 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 212 {
 213     TCGv_i32 t = tcg_temp_new_i32();
 214
 215     gen_helper_sve_predtest1(t, d, g);
 216     do_pred_flags(t);
 217     tcg_temp_free_i32(t);
 218 }
 219
 220 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 221 {
 222     TCGv_ptr dptr = tcg_temp_new_ptr();
 223     TCGv_ptr gptr = tcg_temp_new_ptr();
 224     TCGv_i32 t;
 225
 226     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 227     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 228     t = tcg_const_i32(words);
 229
 230     gen_helper_sve_predtest(t, dptr, gptr, t);
 231     tcg_temp_free_ptr(dptr);
 232     tcg_temp_free_ptr(gptr);
 233
 234     do_pred_flags(t);
 235     tcg_temp_free_i32(t);
 236 }
 237
 238 /* For each element size, the bits within a predicate word that are active.  */
 239 const uint64_t pred_esz_masks[4] = {
 240     0xffffffffffffffffull, 0x5555555555555555ull,
 241     0x1111111111111111ull, 0x0101010101010101ull
 242 };
 243
 244 /*
 245  *** SVE Logical - Unpredicated Group
 246  */
 247
 248 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 249 {
 250     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 251 }
 252
 253 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 254 {
 255     if (a->rn == a->rm) { /* MOV */
 256         return do_mov_z(s, a->rd, a->rn);
 257     } else {
 258         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 259     }
 260 }
 261
 262 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 263 {
 264     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 265 }
 266
 267 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 268 {
 269     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 270 }
 271
 272 /*
 273  *** SVE Integer Arithmetic - Unpredicated Group
 274  */
 275
 276 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 294 }
 295
 296 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 297 {
 298     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 299 }
 300
 301 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 302 {
 303     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 304 }
 305
 306 /*
 307  *** SVE Integer Arithmetic - Binary Predicated Group
 308  */
 309
 310 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 311 {
 312     unsigned vsz = vec_full_reg_size(s);
 313     if (fn == NULL) {
 314         return false;
 315     }
 316     if (sve_access_check(s)) {
 317         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 318                            vec_full_reg_offset(s, a->rn),
 319                            vec_full_reg_offset(s, a->rm),
 320                            pred_full_reg_offset(s, a->pg),
 321                            vsz, vsz, 0, fn);
 322     }
 323     return true;
 324 }
 325
 326 #define DO_ZPZZ(NAME, name) \
 327 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 328                                 uint32_t insn)                            \
 329 {                                                                         \
 330     static gen_helper_gvec_4 * const fns[4] = {                           \
 331         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 332         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 333     };                                                                    \
 334     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 335 }
 336
 337 DO_ZPZZ(AND, and)
 338 DO_ZPZZ(EOR, eor)
 339 DO_ZPZZ(ORR, orr)
 340 DO_ZPZZ(BIC, bic)
 341
 342 DO_ZPZZ(ADD, add)
 343 DO_ZPZZ(SUB, sub)
 344
 345 DO_ZPZZ(SMAX, smax)
 346 DO_ZPZZ(UMAX, umax)
 347 DO_ZPZZ(SMIN, smin)
 348 DO_ZPZZ(UMIN, umin)
 349 DO_ZPZZ(SABD, sabd)
 350 DO_ZPZZ(UABD, uabd)
 351
 352 DO_ZPZZ(MUL, mul)
 353 DO_ZPZZ(SMULH, smulh)
 354 DO_ZPZZ(UMULH, umulh)
 355
 356 DO_ZPZZ(ASR, asr)
 357 DO_ZPZZ(LSR, lsr)
 358 DO_ZPZZ(LSL, lsl)
 359
 360 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 361 {
 362     static gen_helper_gvec_4 * const fns[4] = {
 363         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 364     };
 365     return do_zpzz_ool(s, a, fns[a->esz]);
 366 }
 367
 368 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 369 {
 370     static gen_helper_gvec_4 * const fns[4] = {
 371         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 372     };
 373     return do_zpzz_ool(s, a, fns[a->esz]);
 374 }
 375
 376 DO_ZPZZ(SEL, sel)
 377
 378 #undef DO_ZPZZ
 379
 380 /*
 381  *** SVE Integer Arithmetic - Unary Predicated Group
 382  */
 383
 384 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 385 {
 386     if (fn == NULL) {
 387         return false;
 388     }
 389     if (sve_access_check(s)) {
 390         unsigned vsz = vec_full_reg_size(s);
 391         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 392                            vec_full_reg_offset(s, a->rn),
 393                            pred_full_reg_offset(s, a->pg),
 394                            vsz, vsz, 0, fn);
 395     }
 396     return true;
 397 }
 398
 399 #define DO_ZPZ(NAME, name) \
 400 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 401 {                                                                   \
 402     static gen_helper_gvec_3 * const fns[4] = {                     \
 403         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 404         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 405     };                                                              \
 406     return do_zpz_ool(s, a, fns[a->esz]);                           \
 407 }
 408
 409 DO_ZPZ(CLS, cls)
 410 DO_ZPZ(CLZ, clz)
 411 DO_ZPZ(CNT_zpz, cnt_zpz)
 412 DO_ZPZ(CNOT, cnot)
 413 DO_ZPZ(NOT_zpz, not_zpz)
 414 DO_ZPZ(ABS, abs)
 415 DO_ZPZ(NEG, neg)
 416
 417 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 418 {
 419     static gen_helper_gvec_3 * const fns[4] = {
 420         NULL,
 421         gen_helper_sve_fabs_h,
 422         gen_helper_sve_fabs_s,
 423         gen_helper_sve_fabs_d
 424     };
 425     return do_zpz_ool(s, a, fns[a->esz]);
 426 }
 427
 428 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 429 {
 430     static gen_helper_gvec_3 * const fns[4] = {
 431         NULL,
 432         gen_helper_sve_fneg_h,
 433         gen_helper_sve_fneg_s,
 434         gen_helper_sve_fneg_d
 435     };
 436     return do_zpz_ool(s, a, fns[a->esz]);
 437 }
 438
 439 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 440 {
 441     static gen_helper_gvec_3 * const fns[4] = {
 442         NULL,
 443         gen_helper_sve_sxtb_h,
 444         gen_helper_sve_sxtb_s,
 445         gen_helper_sve_sxtb_d
 446     };
 447     return do_zpz_ool(s, a, fns[a->esz]);
 448 }
 449
 450 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 451 {
 452     static gen_helper_gvec_3 * const fns[4] = {
 453         NULL,
 454         gen_helper_sve_uxtb_h,
 455         gen_helper_sve_uxtb_s,
 456         gen_helper_sve_uxtb_d
 457     };
 458     return do_zpz_ool(s, a, fns[a->esz]);
 459 }
 460
 461 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 462 {
 463     static gen_helper_gvec_3 * const fns[4] = {
 464         NULL, NULL,
 465         gen_helper_sve_sxth_s,
 466         gen_helper_sve_sxth_d
 467     };
 468     return do_zpz_ool(s, a, fns[a->esz]);
 469 }
 470
 471 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 472 {
 473     static gen_helper_gvec_3 * const fns[4] = {
 474         NULL, NULL,
 475         gen_helper_sve_uxth_s,
 476         gen_helper_sve_uxth_d
 477     };
 478     return do_zpz_ool(s, a, fns[a->esz]);
 479 }
 480
 481 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 482 {
 483     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 484 }
 485
 486 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 487 {
 488     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 489 }
 490
 491 #undef DO_ZPZ
 492
 493 /*
 494  *** SVE Integer Reduction Group
 495  */
 496
 497 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 498 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 499                        gen_helper_gvec_reduc *fn)
 500 {
 501     unsigned vsz = vec_full_reg_size(s);
 502     TCGv_ptr t_zn, t_pg;
 503     TCGv_i32 desc;
 504     TCGv_i64 temp;
 505
 506     if (fn == NULL) {
 507         return false;
 508     }
 509     if (!sve_access_check(s)) {
 510         return true;
 511     }
 512
 513     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 514     temp = tcg_temp_new_i64();
 515     t_zn = tcg_temp_new_ptr();
 516     t_pg = tcg_temp_new_ptr();
 517
 518     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 519     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 520     fn(temp, t_zn, t_pg, desc);
 521     tcg_temp_free_ptr(t_zn);
 522     tcg_temp_free_ptr(t_pg);
 523     tcg_temp_free_i32(desc);
 524
 525     write_fp_dreg(s, a->rd, temp);
 526     tcg_temp_free_i64(temp);
 527     return true;
 528 }
 529
 530 #define DO_VPZ(NAME, name) \
 531 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 532 {                                                                        \
 533     static gen_helper_gvec_reduc * const fns[4] = {                      \
 534         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 535         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 536     };                                                                   \
 537     return do_vpz_ool(s, a, fns[a->esz]);                                \
 538 }
 539
 540 DO_VPZ(ORV, orv)
 541 DO_VPZ(ANDV, andv)
 542 DO_VPZ(EORV, eorv)
 543
 544 DO_VPZ(UADDV, uaddv)
 545 DO_VPZ(SMAXV, smaxv)
 546 DO_VPZ(UMAXV, umaxv)
 547 DO_VPZ(SMINV, sminv)
 548 DO_VPZ(UMINV, uminv)
 549
 550 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 551 {
 552     static gen_helper_gvec_reduc * const fns[4] = {
 553         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 554         gen_helper_sve_saddv_s, NULL
 555     };
 556     return do_vpz_ool(s, a, fns[a->esz]);
 557 }
 558
 559 #undef DO_VPZ
 560
 561 /*
 562  *** SVE Shift by Immediate - Predicated Group
 563  */
 564
 565 /* Store zero into every active element of Zd.  We will use this for two
 566  * and three-operand predicated instructions for which logic dictates a
 567  * zero result.
 568  */
 569 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 570 {
 571     static gen_helper_gvec_2 * const fns[4] = {
 572         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 573         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 574     };
 575     if (sve_access_check(s)) {
 576         unsigned vsz = vec_full_reg_size(s);
 577         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 578                            pred_full_reg_offset(s, pg),
 579                            vsz, vsz, 0, fns[esz]);
 580     }
 581     return true;
 582 }
 583
 584 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 585                         gen_helper_gvec_3 *fn)
 586 {
 587     if (sve_access_check(s)) {
 588         unsigned vsz = vec_full_reg_size(s);
 589         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 590                            vec_full_reg_offset(s, a->rn),
 591                            pred_full_reg_offset(s, a->pg),
 592                            vsz, vsz, a->imm, fn);
 593     }
 594     return true;
 595 }
 596
 597 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 598 {
 599     static gen_helper_gvec_3 * const fns[4] = {
 600         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 601         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 602     };
 603     if (a->esz < 0) {
 604         /* Invalid tsz encoding -- see tszimm_esz. */
 605         return false;
 606     }
 607     /* Shift by element size is architecturally valid.  For
 608        arithmetic right-shift, it's the same as by one less. */
 609     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 610     return do_zpzi_ool(s, a, fns[a->esz]);
 611 }
 612
 613 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 614 {
 615     static gen_helper_gvec_3 * const fns[4] = {
 616         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 617         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 618     };
 619     if (a->esz < 0) {
 620         return false;
 621     }
 622     /* Shift by element size is architecturally valid.
 623        For logical shifts, it is a zeroing operation.  */
 624     if (a->imm >= (8 << a->esz)) {
 625         return do_clr_zp(s, a->rd, a->pg, a->esz);
 626     } else {
 627         return do_zpzi_ool(s, a, fns[a->esz]);
 628     }
 629 }
 630
 631 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 635         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 636     };
 637     if (a->esz < 0) {
 638         return false;
 639     }
 640     /* Shift by element size is architecturally valid.
 641        For logical shifts, it is a zeroing operation.  */
 642     if (a->imm >= (8 << a->esz)) {
 643         return do_clr_zp(s, a->rd, a->pg, a->esz);
 644     } else {
 645         return do_zpzi_ool(s, a, fns[a->esz]);
 646     }
 647 }
 648
 649 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 650 {
 651     static gen_helper_gvec_3 * const fns[4] = {
 652         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 653         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 654     };
 655     if (a->esz < 0) {
 656         return false;
 657     }
 658     /* Shift by element size is architecturally valid.  For arithmetic
 659        right shift for division, it is a zeroing operation.  */
 660     if (a->imm >= (8 << a->esz)) {
 661         return do_clr_zp(s, a->rd, a->pg, a->esz);
 662     } else {
 663         return do_zpzi_ool(s, a, fns[a->esz]);
 664     }
 665 }
 666
 667 /*
 668  *** SVE Bitwise Shift - Predicated Group
 669  */
 670
 671 #define DO_ZPZW(NAME, name) \
 672 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 673                                 uint32_t insn)                            \
 674 {                                                                         \
 675     static gen_helper_gvec_4 * const fns[3] = {                           \
 676         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 677         gen_helper_sve_##name##_zpzw_s,                                   \
 678     };                                                                    \
 679     if (a->esz < 0 || a->esz >= 3) {                                      \
 680         return false;                                                     \
 681     }                                                                     \
 682     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 683 }
 684
 685 DO_ZPZW(ASR, asr)
 686 DO_ZPZW(LSR, lsr)
 687 DO_ZPZW(LSL, lsl)
 688
 689 #undef DO_ZPZW
 690
 691 /*
 692  *** SVE Bitwise Shift - Unpredicated Group
 693  */
 694
 695 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 696                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 697                                          int64_t, uint32_t, uint32_t))
 698 {
 699     if (a->esz < 0) {
 700         /* Invalid tsz encoding -- see tszimm_esz. */
 701         return false;
 702     }
 703     if (sve_access_check(s)) {
 704         unsigned vsz = vec_full_reg_size(s);
 705         /* Shift by element size is architecturally valid.  For
 706            arithmetic right-shift, it's the same as by one less.
 707            Otherwise it is a zeroing operation.  */
 708         if (a->imm >= 8 << a->esz) {
 709             if (asr) {
 710                 a->imm = (8 << a->esz) - 1;
 711             } else {
 712                 do_dupi_z(s, a->rd, 0);
 713                 return true;
 714             }
 715         }
 716         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 717                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 718     }
 719     return true;
 720 }
 721
 722 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 723 {
 724     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 725 }
 726
 727 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 728 {
 729     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 730 }
 731
 732 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 733 {
 734     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 735 }
 736
 737 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 738 {
 739     if (fn == NULL) {
 740         return false;
 741     }
 742     if (sve_access_check(s)) {
 743         unsigned vsz = vec_full_reg_size(s);
 744         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 745                            vec_full_reg_offset(s, a->rn),
 746                            vec_full_reg_offset(s, a->rm),
 747                            vsz, vsz, 0, fn);
 748     }
 749     return true;
 750 }
 751
 752 #define DO_ZZW(NAME, name) \
 753 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 754                                uint32_t insn)                             \
 755 {                                                                         \
 756     static gen_helper_gvec_3 * const fns[4] = {                           \
 757         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 758         gen_helper_sve_##name##_zzw_s, NULL                               \
 759     };                                                                    \
 760     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 761 }
 762
 763 DO_ZZW(ASR, asr)
 764 DO_ZZW(LSR, lsr)
 765 DO_ZZW(LSL, lsl)
 766
 767 #undef DO_ZZW
 768
 769 /*
 770  *** SVE Integer Multiply-Add Group
 771  */
 772
 773 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 774                          gen_helper_gvec_5 *fn)
 775 {
 776     if (sve_access_check(s)) {
 777         unsigned vsz = vec_full_reg_size(s);
 778         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 779                            vec_full_reg_offset(s, a->ra),
 780                            vec_full_reg_offset(s, a->rn),
 781                            vec_full_reg_offset(s, a->rm),
 782                            pred_full_reg_offset(s, a->pg),
 783                            vsz, vsz, 0, fn);
 784     }
 785     return true;
 786 }
 787
 788 #define DO_ZPZZZ(NAME, name) \
 789 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 790 {                                                                    \
 791     static gen_helper_gvec_5 * const fns[4] = {                      \
 792         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 793         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 794     };                                                               \
 795     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 796 }
 797
 798 DO_ZPZZZ(MLA, mla)
 799 DO_ZPZZZ(MLS, mls)
 800
 801 #undef DO_ZPZZZ
 802
 803 /*
 804  *** SVE Index Generation Group
 805  */
 806
 807 static void do_index(DisasContext *s, int esz, int rd,
 808                      TCGv_i64 start, TCGv_i64 incr)
 809 {
 810     unsigned vsz = vec_full_reg_size(s);
 811     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 812     TCGv_ptr t_zd = tcg_temp_new_ptr();
 813
 814     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 815     if (esz == 3) {
 816         gen_helper_sve_index_d(t_zd, start, incr, desc);
 817     } else {
 818         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 819         static index_fn * const fns[3] = {
 820             gen_helper_sve_index_b,
 821             gen_helper_sve_index_h,
 822             gen_helper_sve_index_s,
 823         };
 824         TCGv_i32 s32 = tcg_temp_new_i32();
 825         TCGv_i32 i32 = tcg_temp_new_i32();
 826
 827         tcg_gen_extrl_i64_i32(s32, start);
 828         tcg_gen_extrl_i64_i32(i32, incr);
 829         fns[esz](t_zd, s32, i32, desc);
 830
 831         tcg_temp_free_i32(s32);
 832         tcg_temp_free_i32(i32);
 833     }
 834     tcg_temp_free_ptr(t_zd);
 835     tcg_temp_free_i32(desc);
 836 }
 837
 838 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 839 {
 840     if (sve_access_check(s)) {
 841         TCGv_i64 start = tcg_const_i64(a->imm1);
 842         TCGv_i64 incr = tcg_const_i64(a->imm2);
 843         do_index(s, a->esz, a->rd, start, incr);
 844         tcg_temp_free_i64(start);
 845         tcg_temp_free_i64(incr);
 846     }
 847     return true;
 848 }
 849
 850 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 851 {
 852     if (sve_access_check(s)) {
 853         TCGv_i64 start = tcg_const_i64(a->imm);
 854         TCGv_i64 incr = cpu_reg(s, a->rm);
 855         do_index(s, a->esz, a->rd, start, incr);
 856         tcg_temp_free_i64(start);
 857     }
 858     return true;
 859 }
 860
 861 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 862 {
 863     if (sve_access_check(s)) {
 864         TCGv_i64 start = cpu_reg(s, a->rn);
 865         TCGv_i64 incr = tcg_const_i64(a->imm);
 866         do_index(s, a->esz, a->rd, start, incr);
 867         tcg_temp_free_i64(incr);
 868     }
 869     return true;
 870 }
 871
 872 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 873 {
 874     if (sve_access_check(s)) {
 875         TCGv_i64 start = cpu_reg(s, a->rn);
 876         TCGv_i64 incr = cpu_reg(s, a->rm);
 877         do_index(s, a->esz, a->rd, start, incr);
 878     }
 879     return true;
 880 }
 881
 882 /*
 883  *** SVE Stack Allocation Group
 884  */
 885
 886 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 887 {
 888     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 889     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 890     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 891     return true;
 892 }
 893
 894 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 895 {
 896     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 897     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 898     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 899     return true;
 900 }
 901
 902 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 903 {
 904     TCGv_i64 reg = cpu_reg(s, a->rd);
 905     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 906     return true;
 907 }
 908
 909 /*
 910  *** SVE Compute Vector Address Group
 911  */
 912
 913 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 914 {
 915     if (sve_access_check(s)) {
 916         unsigned vsz = vec_full_reg_size(s);
 917         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 918                            vec_full_reg_offset(s, a->rn),
 919                            vec_full_reg_offset(s, a->rm),
 920                            vsz, vsz, a->imm, fn);
 921     }
 922     return true;
 923 }
 924
 925 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 926 {
 927     return do_adr(s, a, gen_helper_sve_adr_p32);
 928 }
 929
 930 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 931 {
 932     return do_adr(s, a, gen_helper_sve_adr_p64);
 933 }
 934
 935 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 936 {
 937     return do_adr(s, a, gen_helper_sve_adr_s32);
 938 }
 939
 940 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 941 {
 942     return do_adr(s, a, gen_helper_sve_adr_u32);
 943 }
 944
 945 /*
 946  *** SVE Integer Misc - Unpredicated Group
 947  */
 948
 949 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 950 {
 951     static gen_helper_gvec_2 * const fns[4] = {
 952         NULL,
 953         gen_helper_sve_fexpa_h,
 954         gen_helper_sve_fexpa_s,
 955         gen_helper_sve_fexpa_d,
 956     };
 957     if (a->esz == 0) {
 958         return false;
 959     }
 960     if (sve_access_check(s)) {
 961         unsigned vsz = vec_full_reg_size(s);
 962         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 963                            vec_full_reg_offset(s, a->rn),
 964                            vsz, vsz, 0, fns[a->esz]);
 965     }
 966     return true;
 967 }
 968
 969 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 970 {
 971     static gen_helper_gvec_3 * const fns[4] = {
 972         NULL,
 973         gen_helper_sve_ftssel_h,
 974         gen_helper_sve_ftssel_s,
 975         gen_helper_sve_ftssel_d,
 976     };
 977     if (a->esz == 0) {
 978         return false;
 979     }
 980     if (sve_access_check(s)) {
 981         unsigned vsz = vec_full_reg_size(s);
 982         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                            vec_full_reg_offset(s, a->rn),
 984                            vec_full_reg_offset(s, a->rm),
 985                            vsz, vsz, 0, fns[a->esz]);
 986     }
 987     return true;
 988 }
 989
 990 /*
 991  *** SVE Predicate Logical Operations Group
 992  */
 993
 994 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 995                           const GVecGen4 *gvec_op)
 996 {
 997     if (!sve_access_check(s)) {
 998         return true;
 999     }
1000
1001     unsigned psz = pred_gvec_reg_size(s);
1002     int dofs = pred_full_reg_offset(s, a->rd);
1003     int nofs = pred_full_reg_offset(s, a->rn);
1004     int mofs = pred_full_reg_offset(s, a->rm);
1005     int gofs = pred_full_reg_offset(s, a->pg);
1006
1007     if (psz == 8) {
1008         /* Do the operation and the flags generation in temps.  */
1009         TCGv_i64 pd = tcg_temp_new_i64();
1010         TCGv_i64 pn = tcg_temp_new_i64();
1011         TCGv_i64 pm = tcg_temp_new_i64();
1012         TCGv_i64 pg = tcg_temp_new_i64();
1013
1014         tcg_gen_ld_i64(pn, cpu_env, nofs);
1015         tcg_gen_ld_i64(pm, cpu_env, mofs);
1016         tcg_gen_ld_i64(pg, cpu_env, gofs);
1017
1018         gvec_op->fni8(pd, pn, pm, pg);
1019         tcg_gen_st_i64(pd, cpu_env, dofs);
1020
1021         do_predtest1(pd, pg);
1022
1023         tcg_temp_free_i64(pd);
1024         tcg_temp_free_i64(pn);
1025         tcg_temp_free_i64(pm);
1026         tcg_temp_free_i64(pg);
1027     } else {
1028         /* The operation and flags generation is large.  The computation
1029          * of the flags depends on the original contents of the guarding
1030          * predicate.  If the destination overwrites the guarding predicate,
1031          * then the easiest way to get this right is to save a copy.
1032           */
1033         int tofs = gofs;
1034         if (a->rd == a->pg) {
1035             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1036             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1037         }
1038
1039         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1040         do_predtest(s, dofs, tofs, psz / 8);
1041     }
1042     return true;
1043 }
1044
1045 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1046 {
1047     tcg_gen_and_i64(pd, pn, pm);
1048     tcg_gen_and_i64(pd, pd, pg);
1049 }
1050
1051 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1052                            TCGv_vec pm, TCGv_vec pg)
1053 {
1054     tcg_gen_and_vec(vece, pd, pn, pm);
1055     tcg_gen_and_vec(vece, pd, pd, pg);
1056 }
1057
1058 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1059 {
1060     static const GVecGen4 op = {
1061         .fni8 = gen_and_pg_i64,
1062         .fniv = gen_and_pg_vec,
1063         .fno = gen_helper_sve_and_pppp,
1064         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1065     };
1066     if (a->s) {
1067         return do_pppp_flags(s, a, &op);
1068     } else if (a->rn == a->rm) {
1069         if (a->pg == a->rn) {
1070             return do_mov_p(s, a->rd, a->rn);
1071         } else {
1072             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1073         }
1074     } else if (a->pg == a->rn || a->pg == a->rm) {
1075         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1076     } else {
1077         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1078     }
1079 }
1080
1081 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1082 {
1083     tcg_gen_andc_i64(pd, pn, pm);
1084     tcg_gen_and_i64(pd, pd, pg);
1085 }
1086
1087 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1088                            TCGv_vec pm, TCGv_vec pg)
1089 {
1090     tcg_gen_andc_vec(vece, pd, pn, pm);
1091     tcg_gen_and_vec(vece, pd, pd, pg);
1092 }
1093
1094 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1095 {
1096     static const GVecGen4 op = {
1097         .fni8 = gen_bic_pg_i64,
1098         .fniv = gen_bic_pg_vec,
1099         .fno = gen_helper_sve_bic_pppp,
1100         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1101     };
1102     if (a->s) {
1103         return do_pppp_flags(s, a, &op);
1104     } else if (a->pg == a->rn) {
1105         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1106     } else {
1107         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1108     }
1109 }
1110
1111 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1112 {
1113     tcg_gen_xor_i64(pd, pn, pm);
1114     tcg_gen_and_i64(pd, pd, pg);
1115 }
1116
1117 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1118                            TCGv_vec pm, TCGv_vec pg)
1119 {
1120     tcg_gen_xor_vec(vece, pd, pn, pm);
1121     tcg_gen_and_vec(vece, pd, pd, pg);
1122 }
1123
1124 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1125 {
1126     static const GVecGen4 op = {
1127         .fni8 = gen_eor_pg_i64,
1128         .fniv = gen_eor_pg_vec,
1129         .fno = gen_helper_sve_eor_pppp,
1130         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1131     };
1132     if (a->s) {
1133         return do_pppp_flags(s, a, &op);
1134     } else {
1135         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1136     }
1137 }
1138
1139 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1140 {
1141     tcg_gen_and_i64(pn, pn, pg);
1142     tcg_gen_andc_i64(pm, pm, pg);
1143     tcg_gen_or_i64(pd, pn, pm);
1144 }
1145
1146 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1147                            TCGv_vec pm, TCGv_vec pg)
1148 {
1149     tcg_gen_and_vec(vece, pn, pn, pg);
1150     tcg_gen_andc_vec(vece, pm, pm, pg);
1151     tcg_gen_or_vec(vece, pd, pn, pm);
1152 }
1153
1154 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1155 {
1156     static const GVecGen4 op = {
1157         .fni8 = gen_sel_pg_i64,
1158         .fniv = gen_sel_pg_vec,
1159         .fno = gen_helper_sve_sel_pppp,
1160         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1161     };
1162     if (a->s) {
1163         return false;
1164     } else {
1165         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1166     }
1167 }
1168
1169 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1170 {
1171     tcg_gen_or_i64(pd, pn, pm);
1172     tcg_gen_and_i64(pd, pd, pg);
1173 }
1174
1175 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1176                            TCGv_vec pm, TCGv_vec pg)
1177 {
1178     tcg_gen_or_vec(vece, pd, pn, pm);
1179     tcg_gen_and_vec(vece, pd, pd, pg);
1180 }
1181
1182 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1183 {
1184     static const GVecGen4 op = {
1185         .fni8 = gen_orr_pg_i64,
1186         .fniv = gen_orr_pg_vec,
1187         .fno = gen_helper_sve_orr_pppp,
1188         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1189     };
1190     if (a->s) {
1191         return do_pppp_flags(s, a, &op);
1192     } else if (a->pg == a->rn && a->rn == a->rm) {
1193         return do_mov_p(s, a->rd, a->rn);
1194     } else {
1195         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1196     }
1197 }
1198
1199 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1200 {
1201     tcg_gen_orc_i64(pd, pn, pm);
1202     tcg_gen_and_i64(pd, pd, pg);
1203 }
1204
1205 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1206                            TCGv_vec pm, TCGv_vec pg)
1207 {
1208     tcg_gen_orc_vec(vece, pd, pn, pm);
1209     tcg_gen_and_vec(vece, pd, pd, pg);
1210 }
1211
1212 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1213 {
1214     static const GVecGen4 op = {
1215         .fni8 = gen_orn_pg_i64,
1216         .fniv = gen_orn_pg_vec,
1217         .fno = gen_helper_sve_orn_pppp,
1218         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1219     };
1220     if (a->s) {
1221         return do_pppp_flags(s, a, &op);
1222     } else {
1223         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1224     }
1225 }
1226
1227 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1228 {
1229     tcg_gen_or_i64(pd, pn, pm);
1230     tcg_gen_andc_i64(pd, pg, pd);
1231 }
1232
1233 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1234                            TCGv_vec pm, TCGv_vec pg)
1235 {
1236     tcg_gen_or_vec(vece, pd, pn, pm);
1237     tcg_gen_andc_vec(vece, pd, pg, pd);
1238 }
1239
1240 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1241 {
1242     static const GVecGen4 op = {
1243         .fni8 = gen_nor_pg_i64,
1244         .fniv = gen_nor_pg_vec,
1245         .fno = gen_helper_sve_nor_pppp,
1246         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1247     };
1248     if (a->s) {
1249         return do_pppp_flags(s, a, &op);
1250     } else {
1251         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1252     }
1253 }
1254
1255 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1256 {
1257     tcg_gen_and_i64(pd, pn, pm);
1258     tcg_gen_andc_i64(pd, pg, pd);
1259 }
1260
1261 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1262                            TCGv_vec pm, TCGv_vec pg)
1263 {
1264     tcg_gen_and_vec(vece, pd, pn, pm);
1265     tcg_gen_andc_vec(vece, pd, pg, pd);
1266 }
1267
1268 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1269 {
1270     static const GVecGen4 op = {
1271         .fni8 = gen_nand_pg_i64,
1272         .fniv = gen_nand_pg_vec,
1273         .fno = gen_helper_sve_nand_pppp,
1274         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1275     };
1276     if (a->s) {
1277         return do_pppp_flags(s, a, &op);
1278     } else {
1279         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1280     }
1281 }
1282
1283 /*
1284  *** SVE Predicate Misc Group
1285  */
1286
1287 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1288 {
1289     if (sve_access_check(s)) {
1290         int nofs = pred_full_reg_offset(s, a->rn);
1291         int gofs = pred_full_reg_offset(s, a->pg);
1292         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1293
1294         if (words == 1) {
1295             TCGv_i64 pn = tcg_temp_new_i64();
1296             TCGv_i64 pg = tcg_temp_new_i64();
1297
1298             tcg_gen_ld_i64(pn, cpu_env, nofs);
1299             tcg_gen_ld_i64(pg, cpu_env, gofs);
1300             do_predtest1(pn, pg);
1301
1302             tcg_temp_free_i64(pn);
1303             tcg_temp_free_i64(pg);
1304         } else {
1305             do_predtest(s, nofs, gofs, words);
1306         }
1307     }
1308     return true;
1309 }
1310
1311 /* See the ARM pseudocode DecodePredCount.  */
1312 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1313 {
1314     unsigned elements = fullsz >> esz;
1315     unsigned bound;
1316
1317     switch (pattern) {
1318     case 0x0: /* POW2 */
1319         return pow2floor(elements);
1320     case 0x1: /* VL1 */
1321     case 0x2: /* VL2 */
1322     case 0x3: /* VL3 */
1323     case 0x4: /* VL4 */
1324     case 0x5: /* VL5 */
1325     case 0x6: /* VL6 */
1326     case 0x7: /* VL7 */
1327     case 0x8: /* VL8 */
1328         bound = pattern;
1329         break;
1330     case 0x9: /* VL16 */
1331     case 0xa: /* VL32 */
1332     case 0xb: /* VL64 */
1333     case 0xc: /* VL128 */
1334     case 0xd: /* VL256 */
1335         bound = 16 << (pattern - 9);
1336         break;
1337     case 0x1d: /* MUL4 */
1338         return elements - elements % 4;
1339     case 0x1e: /* MUL3 */
1340         return elements - elements % 3;
1341     case 0x1f: /* ALL */
1342         return elements;
1343     default:   /* #uimm5 */
1344         return 0;
1345     }
1346     return elements >= bound ? bound : 0;
1347 }
1348
1349 /* This handles all of the predicate initialization instructions,
1350  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1351  * so that decode_pred_count returns 0.  For SETFFR, we will have
1352  * set RD == 16 == FFR.
1353  */
1354 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1355 {
1356     if (!sve_access_check(s)) {
1357         return true;
1358     }
1359
1360     unsigned fullsz = vec_full_reg_size(s);
1361     unsigned ofs = pred_full_reg_offset(s, rd);
1362     unsigned numelem, setsz, i;
1363     uint64_t word, lastword;
1364     TCGv_i64 t;
1365
1366     numelem = decode_pred_count(fullsz, pat, esz);
1367
1368     /* Determine what we must store into each bit, and how many.  */
1369     if (numelem == 0) {
1370         lastword = word = 0;
1371         setsz = fullsz;
1372     } else {
1373         setsz = numelem << esz;
1374         lastword = word = pred_esz_masks[esz];
1375         if (setsz % 64) {
1376             lastword &= ~(-1ull << (setsz % 64));
1377         }
1378     }
1379
1380     t = tcg_temp_new_i64();
1381     if (fullsz <= 64) {
1382         tcg_gen_movi_i64(t, lastword);
1383         tcg_gen_st_i64(t, cpu_env, ofs);
1384         goto done;
1385     }
1386
1387     if (word == lastword) {
1388         unsigned maxsz = size_for_gvec(fullsz / 8);
1389         unsigned oprsz = size_for_gvec(setsz / 8);
1390
1391         if (oprsz * 8 == setsz) {
1392             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1393             goto done;
1394         }
1395         if (oprsz * 8 == setsz + 8) {
1396             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1397             tcg_gen_movi_i64(t, 0);
1398             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1399             goto done;
1400         }
1401     }
1402
1403     setsz /= 8;
1404     fullsz /= 8;
1405
1406     tcg_gen_movi_i64(t, word);
1407     for (i = 0; i < setsz; i += 8) {
1408         tcg_gen_st_i64(t, cpu_env, ofs + i);
1409     }
1410     if (lastword != word) {
1411         tcg_gen_movi_i64(t, lastword);
1412         tcg_gen_st_i64(t, cpu_env, ofs + i);
1413         i += 8;
1414     }
1415     if (i < fullsz) {
1416         tcg_gen_movi_i64(t, 0);
1417         for (; i < fullsz; i += 8) {
1418             tcg_gen_st_i64(t, cpu_env, ofs + i);
1419         }
1420     }
1421
1422  done:
1423     tcg_temp_free_i64(t);
1424
1425     /* PTRUES */
1426     if (setflag) {
1427         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1428         tcg_gen_movi_i32(cpu_CF, word == 0);
1429         tcg_gen_movi_i32(cpu_VF, 0);
1430         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1431     }
1432     return true;
1433 }
1434
1435 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1436 {
1437     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1438 }
1439
1440 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1441 {
1442     /* Note pat == 31 is #all, to set all elements.  */
1443     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1444 }
1445
1446 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1447 {
1448     /* Note pat == 32 is #unimp, to set no elements.  */
1449     return do_predset(s, 0, a->rd, 32, false);
1450 }
1451
1452 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1453 {
1454     /* The path through do_pppp_flags is complicated enough to want to avoid
1455      * duplication.  Frob the arguments into the form of a predicated AND.
1456      */
1457     arg_rprr_s alt_a = {
1458         .rd = a->rd, .pg = a->pg, .s = a->s,
1459         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1460     };
1461     return trans_AND_pppp(s, &alt_a, insn);
1462 }
1463
1464 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1465 {
1466     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1467 }
1468
1469 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1470 {
1471     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1472 }
1473
1474 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1475                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1476                                            TCGv_ptr, TCGv_i32))
1477 {
1478     if (!sve_access_check(s)) {
1479         return true;
1480     }
1481
1482     TCGv_ptr t_pd = tcg_temp_new_ptr();
1483     TCGv_ptr t_pg = tcg_temp_new_ptr();
1484     TCGv_i32 t;
1485     unsigned desc;
1486
1487     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1488     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1489
1490     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1491     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1492     t = tcg_const_i32(desc);
1493
1494     gen_fn(t, t_pd, t_pg, t);
1495     tcg_temp_free_ptr(t_pd);
1496     tcg_temp_free_ptr(t_pg);
1497
1498     do_pred_flags(t);
1499     tcg_temp_free_i32(t);
1500     return true;
1501 }
1502
1503 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1504 {
1505     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1506 }
1507
1508 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1509 {
1510     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1511 }
1512
1513 /*
1514  *** SVE Element Count Group
1515  */
1516
1517 /* Perform an inline saturating addition of a 32-bit value within
1518  * a 64-bit register.  The second operand is known to be positive,
1519  * which halves the comparisions we must perform to bound the result.
1520  */
1521 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1522 {
1523     int64_t ibound;
1524     TCGv_i64 bound;
1525     TCGCond cond;
1526
1527     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1528     if (u) {
1529         tcg_gen_ext32u_i64(reg, reg);
1530     } else {
1531         tcg_gen_ext32s_i64(reg, reg);
1532     }
1533     if (d) {
1534         tcg_gen_sub_i64(reg, reg, val);
1535         ibound = (u ? 0 : INT32_MIN);
1536         cond = TCG_COND_LT;
1537     } else {
1538         tcg_gen_add_i64(reg, reg, val);
1539         ibound = (u ? UINT32_MAX : INT32_MAX);
1540         cond = TCG_COND_GT;
1541     }
1542     bound = tcg_const_i64(ibound);
1543     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1544     tcg_temp_free_i64(bound);
1545 }
1546
1547 /* Similarly with 64-bit values.  */
1548 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1549 {
1550     TCGv_i64 t0 = tcg_temp_new_i64();
1551     TCGv_i64 t1 = tcg_temp_new_i64();
1552     TCGv_i64 t2;
1553
1554     if (u) {
1555         if (d) {
1556             tcg_gen_sub_i64(t0, reg, val);
1557             tcg_gen_movi_i64(t1, 0);
1558             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1559         } else {
1560             tcg_gen_add_i64(t0, reg, val);
1561             tcg_gen_movi_i64(t1, -1);
1562             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1563         }
1564     } else {
1565         if (d) {
1566             /* Detect signed overflow for subtraction.  */
1567             tcg_gen_xor_i64(t0, reg, val);
1568             tcg_gen_sub_i64(t1, reg, val);
1569             tcg_gen_xor_i64(reg, reg, t0);
1570             tcg_gen_and_i64(t0, t0, reg);
1571
1572             /* Bound the result.  */
1573             tcg_gen_movi_i64(reg, INT64_MIN);
1574             t2 = tcg_const_i64(0);
1575             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1576         } else {
1577             /* Detect signed overflow for addition.  */
1578             tcg_gen_xor_i64(t0, reg, val);
1579             tcg_gen_add_i64(reg, reg, val);
1580             tcg_gen_xor_i64(t1, reg, val);
1581             tcg_gen_andc_i64(t0, t1, t0);
1582
1583             /* Bound the result.  */
1584             tcg_gen_movi_i64(t1, INT64_MAX);
1585             t2 = tcg_const_i64(0);
1586             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1587         }
1588         tcg_temp_free_i64(t2);
1589     }
1590     tcg_temp_free_i64(t0);
1591     tcg_temp_free_i64(t1);
1592 }
1593
1594 /* Similarly with a vector and a scalar operand.  */
1595 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1596                               TCGv_i64 val, bool u, bool d)
1597 {
1598     unsigned vsz = vec_full_reg_size(s);
1599     TCGv_ptr dptr, nptr;
1600     TCGv_i32 t32, desc;
1601     TCGv_i64 t64;
1602
1603     dptr = tcg_temp_new_ptr();
1604     nptr = tcg_temp_new_ptr();
1605     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1606     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1607     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1608
1609     switch (esz) {
1610     case MO_8:
1611         t32 = tcg_temp_new_i32();
1612         tcg_gen_extrl_i64_i32(t32, val);
1613         if (d) {
1614             tcg_gen_neg_i32(t32, t32);
1615         }
1616         if (u) {
1617             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1618         } else {
1619             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1620         }
1621         tcg_temp_free_i32(t32);
1622         break;
1623
1624     case MO_16:
1625         t32 = tcg_temp_new_i32();
1626         tcg_gen_extrl_i64_i32(t32, val);
1627         if (d) {
1628             tcg_gen_neg_i32(t32, t32);
1629         }
1630         if (u) {
1631             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1632         } else {
1633             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1634         }
1635         tcg_temp_free_i32(t32);
1636         break;
1637
1638     case MO_32:
1639         t64 = tcg_temp_new_i64();
1640         if (d) {
1641             tcg_gen_neg_i64(t64, val);
1642         } else {
1643             tcg_gen_mov_i64(t64, val);
1644         }
1645         if (u) {
1646             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1647         } else {
1648             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1649         }
1650         tcg_temp_free_i64(t64);
1651         break;
1652
1653     case MO_64:
1654         if (u) {
1655             if (d) {
1656                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1657             } else {
1658                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1659             }
1660         } else if (d) {
1661             t64 = tcg_temp_new_i64();
1662             tcg_gen_neg_i64(t64, val);
1663             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1664             tcg_temp_free_i64(t64);
1665         } else {
1666             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1667         }
1668         break;
1669
1670     default:
1671         g_assert_not_reached();
1672     }
1673
1674     tcg_temp_free_ptr(dptr);
1675     tcg_temp_free_ptr(nptr);
1676     tcg_temp_free_i32(desc);
1677 }
1678
1679 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1680 {
1681     if (sve_access_check(s)) {
1682         unsigned fullsz = vec_full_reg_size(s);
1683         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1684         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1685     }
1686     return true;
1687 }
1688
1689 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1690 {
1691     if (sve_access_check(s)) {
1692         unsigned fullsz = vec_full_reg_size(s);
1693         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1694         int inc = numelem * a->imm * (a->d ? -1 : 1);
1695         TCGv_i64 reg = cpu_reg(s, a->rd);
1696
1697         tcg_gen_addi_i64(reg, reg, inc);
1698     }
1699     return true;
1700 }
1701
1702 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1703                                uint32_t insn)
1704 {
1705     if (!sve_access_check(s)) {
1706         return true;
1707     }
1708
1709     unsigned fullsz = vec_full_reg_size(s);
1710     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1711     int inc = numelem * a->imm;
1712     TCGv_i64 reg = cpu_reg(s, a->rd);
1713
1714     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1715     if (inc == 0) {
1716         if (a->u) {
1717             tcg_gen_ext32u_i64(reg, reg);
1718         } else {
1719             tcg_gen_ext32s_i64(reg, reg);
1720         }
1721     } else {
1722         TCGv_i64 t = tcg_const_i64(inc);
1723         do_sat_addsub_32(reg, t, a->u, a->d);
1724         tcg_temp_free_i64(t);
1725     }
1726     return true;
1727 }
1728
1729 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1730                                uint32_t insn)
1731 {
1732     if (!sve_access_check(s)) {
1733         return true;
1734     }
1735
1736     unsigned fullsz = vec_full_reg_size(s);
1737     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1738     int inc = numelem * a->imm;
1739     TCGv_i64 reg = cpu_reg(s, a->rd);
1740
1741     if (inc != 0) {
1742         TCGv_i64 t = tcg_const_i64(inc);
1743         do_sat_addsub_64(reg, t, a->u, a->d);
1744         tcg_temp_free_i64(t);
1745     }
1746     return true;
1747 }
1748
1749 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1750 {
1751     if (a->esz == 0) {
1752         return false;
1753     }
1754
1755     unsigned fullsz = vec_full_reg_size(s);
1756     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1757     int inc = numelem * a->imm;
1758
1759     if (inc != 0) {
1760         if (sve_access_check(s)) {
1761             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1762             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1763                               vec_full_reg_offset(s, a->rn),
1764                               t, fullsz, fullsz);
1765             tcg_temp_free_i64(t);
1766         }
1767     } else {
1768         do_mov_z(s, a->rd, a->rn);
1769     }
1770     return true;
1771 }
1772
1773 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1774                             uint32_t insn)
1775 {
1776     if (a->esz == 0) {
1777         return false;
1778     }
1779
1780     unsigned fullsz = vec_full_reg_size(s);
1781     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1782     int inc = numelem * a->imm;
1783
1784     if (inc != 0) {
1785         if (sve_access_check(s)) {
1786             TCGv_i64 t = tcg_const_i64(inc);
1787             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1788             tcg_temp_free_i64(t);
1789         }
1790     } else {
1791         do_mov_z(s, a->rd, a->rn);
1792     }
1793     return true;
1794 }
1795
1796 /*
1797  *** SVE Bitwise Immediate Group
1798  */
1799
1800 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1801 {
1802     uint64_t imm;
1803     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1804                                 extract32(a->dbm, 0, 6),
1805                                 extract32(a->dbm, 6, 6))) {
1806         return false;
1807     }
1808     if (sve_access_check(s)) {
1809         unsigned vsz = vec_full_reg_size(s);
1810         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1811                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1812     }
1813     return true;
1814 }
1815
1816 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1817 {
1818     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1819 }
1820
1821 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1822 {
1823     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1824 }
1825
1826 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1827 {
1828     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1829 }
1830
1831 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1832 {
1833     uint64_t imm;
1834     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1835                                 extract32(a->dbm, 0, 6),
1836                                 extract32(a->dbm, 6, 6))) {
1837         return false;
1838     }
1839     if (sve_access_check(s)) {
1840         do_dupi_z(s, a->rd, imm);
1841     }
1842     return true;
1843 }
1844
1845 /*
1846  *** SVE Integer Wide Immediate - Predicated Group
1847  */
1848
1849 /* Implement all merging copies.  This is used for CPY (immediate),
1850  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1851  */
1852 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1853                      TCGv_i64 val)
1854 {
1855     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1856     static gen_cpy * const fns[4] = {
1857         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1858         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1859     };
1860     unsigned vsz = vec_full_reg_size(s);
1861     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1862     TCGv_ptr t_zd = tcg_temp_new_ptr();
1863     TCGv_ptr t_zn = tcg_temp_new_ptr();
1864     TCGv_ptr t_pg = tcg_temp_new_ptr();
1865
1866     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1867     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1868     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1869
1870     fns[esz](t_zd, t_zn, t_pg, val, desc);
1871
1872     tcg_temp_free_ptr(t_zd);
1873     tcg_temp_free_ptr(t_zn);
1874     tcg_temp_free_ptr(t_pg);
1875     tcg_temp_free_i32(desc);
1876 }
1877
1878 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1879 {
1880     if (a->esz == 0) {
1881         return false;
1882     }
1883     if (sve_access_check(s)) {
1884         /* Decode the VFP immediate.  */
1885         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1886         TCGv_i64 t_imm = tcg_const_i64(imm);
1887         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1888         tcg_temp_free_i64(t_imm);
1889     }
1890     return true;
1891 }
1892
1893 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1894 {
1895     if (a->esz == 0 && extract32(insn, 13, 1)) {
1896         return false;
1897     }
1898     if (sve_access_check(s)) {
1899         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1900         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1901         tcg_temp_free_i64(t_imm);
1902     }
1903     return true;
1904 }
1905
1906 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1907 {
1908     static gen_helper_gvec_2i * const fns[4] = {
1909         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1910         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1911     };
1912
1913     if (a->esz == 0 && extract32(insn, 13, 1)) {
1914         return false;
1915     }
1916     if (sve_access_check(s)) {
1917         unsigned vsz = vec_full_reg_size(s);
1918         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1919         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1920                             pred_full_reg_offset(s, a->pg),
1921                             t_imm, vsz, vsz, 0, fns[a->esz]);
1922         tcg_temp_free_i64(t_imm);
1923     }
1924     return true;
1925 }
1926
1927 /*
1928  *** SVE Permute Extract Group
1929  */
1930
1931 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1932 {
1933     if (!sve_access_check(s)) {
1934         return true;
1935     }
1936
1937     unsigned vsz = vec_full_reg_size(s);
1938     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1939     unsigned n_siz = vsz - n_ofs;
1940     unsigned d = vec_full_reg_offset(s, a->rd);
1941     unsigned n = vec_full_reg_offset(s, a->rn);
1942     unsigned m = vec_full_reg_offset(s, a->rm);
1943
1944     /* Use host vector move insns if we have appropriate sizes
1945      * and no unfortunate overlap.
1946      */
1947     if (m != d
1948         && n_ofs == size_for_gvec(n_ofs)
1949         && n_siz == size_for_gvec(n_siz)
1950         && (d != n || n_siz <= n_ofs)) {
1951         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1952         if (n_ofs != 0) {
1953             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1954         }
1955     } else {
1956         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1957     }
1958     return true;
1959 }
1960
1961 /*
1962  *** SVE Permute - Unpredicated Group
1963  */
1964
1965 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1966 {
1967     if (sve_access_check(s)) {
1968         unsigned vsz = vec_full_reg_size(s);
1969         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1970                              vsz, vsz, cpu_reg_sp(s, a->rn));
1971     }
1972     return true;
1973 }
1974
1975 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1976 {
1977     if ((a->imm & 0x1f) == 0) {
1978         return false;
1979     }
1980     if (sve_access_check(s)) {
1981         unsigned vsz = vec_full_reg_size(s);
1982         unsigned dofs = vec_full_reg_offset(s, a->rd);
1983         unsigned esz, index;
1984
1985         esz = ctz32(a->imm);
1986         index = a->imm >> (esz + 1);
1987
1988         if ((index << esz) < vsz) {
1989             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1990             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1991         } else {
1992             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
1993         }
1994     }
1995     return true;
1996 }
1997
1998 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
1999 {
2000     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2001     static gen_insr * const fns[4] = {
2002         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2003         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2004     };
2005     unsigned vsz = vec_full_reg_size(s);
2006     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2007     TCGv_ptr t_zd = tcg_temp_new_ptr();
2008     TCGv_ptr t_zn = tcg_temp_new_ptr();
2009
2010     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2011     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2012
2013     fns[a->esz](t_zd, t_zn, val, desc);
2014
2015     tcg_temp_free_ptr(t_zd);
2016     tcg_temp_free_ptr(t_zn);
2017     tcg_temp_free_i32(desc);
2018 }
2019
2020 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2021 {
2022     if (sve_access_check(s)) {
2023         TCGv_i64 t = tcg_temp_new_i64();
2024         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2025         do_insr_i64(s, a, t);
2026         tcg_temp_free_i64(t);
2027     }
2028     return true;
2029 }
2030
2031 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2032 {
2033     if (sve_access_check(s)) {
2034         do_insr_i64(s, a, cpu_reg(s, a->rm));
2035     }
2036     return true;
2037 }
2038
2039 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2040 {
2041     static gen_helper_gvec_2 * const fns[4] = {
2042         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2043         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2044     };
2045
2046     if (sve_access_check(s)) {
2047         unsigned vsz = vec_full_reg_size(s);
2048         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2049                            vec_full_reg_offset(s, a->rn),
2050                            vsz, vsz, 0, fns[a->esz]);
2051     }
2052     return true;
2053 }
2054
2055 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2056 {
2057     static gen_helper_gvec_3 * const fns[4] = {
2058         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2059         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2060     };
2061
2062     if (sve_access_check(s)) {
2063         unsigned vsz = vec_full_reg_size(s);
2064         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2065                            vec_full_reg_offset(s, a->rn),
2066                            vec_full_reg_offset(s, a->rm),
2067                            vsz, vsz, 0, fns[a->esz]);
2068     }
2069     return true;
2070 }
2071
2072 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2073 {
2074     static gen_helper_gvec_2 * const fns[4][2] = {
2075         { NULL, NULL },
2076         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2077         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2078         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2079     };
2080
2081     if (a->esz == 0) {
2082         return false;
2083     }
2084     if (sve_access_check(s)) {
2085         unsigned vsz = vec_full_reg_size(s);
2086         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2087                            vec_full_reg_offset(s, a->rn)
2088                            + (a->h ? vsz / 2 : 0),
2089                            vsz, vsz, 0, fns[a->esz][a->u]);
2090     }
2091     return true;
2092 }
2093
2094 /*
2095  *** SVE Permute - Predicates Group
2096  */
2097
2098 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2099                           gen_helper_gvec_3 *fn)
2100 {
2101     if (!sve_access_check(s)) {
2102         return true;
2103     }
2104
2105     unsigned vsz = pred_full_reg_size(s);
2106
2107     /* Predicate sizes may be smaller and cannot use simd_desc.
2108        We cannot round up, as we do elsewhere, because we need
2109        the exact size for ZIP2 and REV.  We retain the style for
2110        the other helpers for consistency.  */
2111     TCGv_ptr t_d = tcg_temp_new_ptr();
2112     TCGv_ptr t_n = tcg_temp_new_ptr();
2113     TCGv_ptr t_m = tcg_temp_new_ptr();
2114     TCGv_i32 t_desc;
2115     int desc;
2116
2117     desc = vsz - 2;
2118     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2119     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2120
2121     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2122     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2123     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2124     t_desc = tcg_const_i32(desc);
2125
2126     fn(t_d, t_n, t_m, t_desc);
2127
2128     tcg_temp_free_ptr(t_d);
2129     tcg_temp_free_ptr(t_n);
2130     tcg_temp_free_ptr(t_m);
2131     tcg_temp_free_i32(t_desc);
2132     return true;
2133 }
2134
2135 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2136                           gen_helper_gvec_2 *fn)
2137 {
2138     if (!sve_access_check(s)) {
2139         return true;
2140     }
2141
2142     unsigned vsz = pred_full_reg_size(s);
2143     TCGv_ptr t_d = tcg_temp_new_ptr();
2144     TCGv_ptr t_n = tcg_temp_new_ptr();
2145     TCGv_i32 t_desc;
2146     int desc;
2147
2148     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2149     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2150
2151     /* Predicate sizes may be smaller and cannot use simd_desc.
2152        We cannot round up, as we do elsewhere, because we need
2153        the exact size for ZIP2 and REV.  We retain the style for
2154        the other helpers for consistency.  */
2155
2156     desc = vsz - 2;
2157     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2158     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2159     t_desc = tcg_const_i32(desc);
2160
2161     fn(t_d, t_n, t_desc);
2162
2163     tcg_temp_free_i32(t_desc);
2164     tcg_temp_free_ptr(t_d);
2165     tcg_temp_free_ptr(t_n);
2166     return true;
2167 }
2168
2169 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2170 {
2171     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2172 }
2173
2174 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2175 {
2176     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2177 }
2178
2179 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2180 {
2181     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2182 }
2183
2184 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2185 {
2186     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2187 }
2188
2189 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2190 {
2191     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2192 }
2193
2194 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195 {
2196     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2197 }
2198
2199 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2200 {
2201     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2202 }
2203
2204 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2205 {
2206     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2207 }
2208
2209 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2210 {
2211     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2212 }
2213
2214 /*
2215  *** SVE Permute - Interleaving Group
2216  */
2217
2218 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2219 {
2220     static gen_helper_gvec_3 * const fns[4] = {
2221         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2222         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2223     };
2224
2225     if (sve_access_check(s)) {
2226         unsigned vsz = vec_full_reg_size(s);
2227         unsigned high_ofs = high ? vsz / 2 : 0;
2228         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2229                            vec_full_reg_offset(s, a->rn) + high_ofs,
2230                            vec_full_reg_offset(s, a->rm) + high_ofs,
2231                            vsz, vsz, 0, fns[a->esz]);
2232     }
2233     return true;
2234 }
2235
2236 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2237                             gen_helper_gvec_3 *fn)
2238 {
2239     if (sve_access_check(s)) {
2240         unsigned vsz = vec_full_reg_size(s);
2241         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2242                            vec_full_reg_offset(s, a->rn),
2243                            vec_full_reg_offset(s, a->rm),
2244                            vsz, vsz, data, fn);
2245     }
2246     return true;
2247 }
2248
2249 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2250 {
2251     return do_zip(s, a, false);
2252 }
2253
2254 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2255 {
2256     return do_zip(s, a, true);
2257 }
2258
2259 static gen_helper_gvec_3 * const uzp_fns[4] = {
2260     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2261     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2262 };
2263
2264 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2265 {
2266     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2267 }
2268
2269 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2270 {
2271     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2272 }
2273
2274 static gen_helper_gvec_3 * const trn_fns[4] = {
2275     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2276     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2277 };
2278
2279 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280 {
2281     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2282 }
2283
2284 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2285 {
2286     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2287 }
2288
2289 /*
2290  *** SVE Permute Vector - Predicated Group
2291  */
2292
2293 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2294 {
2295     static gen_helper_gvec_3 * const fns[4] = {
2296         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2297     };
2298     return do_zpz_ool(s, a, fns[a->esz]);
2299 }
2300
2301 /* Call the helper that computes the ARM LastActiveElement pseudocode
2302  * function, scaled by the element size.  This includes the not found
2303  * indication; e.g. not found for esz=3 is -8.
2304  */
2305 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2306 {
2307     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2308      * round up, as we do elsewhere, because we need the exact size.
2309      */
2310     TCGv_ptr t_p = tcg_temp_new_ptr();
2311     TCGv_i32 t_desc;
2312     unsigned vsz = pred_full_reg_size(s);
2313     unsigned desc;
2314
2315     desc = vsz - 2;
2316     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2317
2318     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2319     t_desc = tcg_const_i32(desc);
2320
2321     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2322
2323     tcg_temp_free_i32(t_desc);
2324     tcg_temp_free_ptr(t_p);
2325 }
2326
2327 /* Increment LAST to the offset of the next element in the vector,
2328  * wrapping around to 0.
2329  */
2330 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2331 {
2332     unsigned vsz = vec_full_reg_size(s);
2333
2334     tcg_gen_addi_i32(last, last, 1 << esz);
2335     if (is_power_of_2(vsz)) {
2336         tcg_gen_andi_i32(last, last, vsz - 1);
2337     } else {
2338         TCGv_i32 max = tcg_const_i32(vsz);
2339         TCGv_i32 zero = tcg_const_i32(0);
2340         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2341         tcg_temp_free_i32(max);
2342         tcg_temp_free_i32(zero);
2343     }
2344 }
2345
2346 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2347 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2348 {
2349     unsigned vsz = vec_full_reg_size(s);
2350
2351     if (is_power_of_2(vsz)) {
2352         tcg_gen_andi_i32(last, last, vsz - 1);
2353     } else {
2354         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2355         TCGv_i32 zero = tcg_const_i32(0);
2356         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2357         tcg_temp_free_i32(max);
2358         tcg_temp_free_i32(zero);
2359     }
2360 }
2361
2362 /* Load an unsigned element of ESZ from BASE+OFS.  */
2363 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2364 {
2365     TCGv_i64 r = tcg_temp_new_i64();
2366
2367     switch (esz) {
2368     case 0:
2369         tcg_gen_ld8u_i64(r, base, ofs);
2370         break;
2371     case 1:
2372         tcg_gen_ld16u_i64(r, base, ofs);
2373         break;
2374     case 2:
2375         tcg_gen_ld32u_i64(r, base, ofs);
2376         break;
2377     case 3:
2378         tcg_gen_ld_i64(r, base, ofs);
2379         break;
2380     default:
2381         g_assert_not_reached();
2382     }
2383     return r;
2384 }
2385
2386 /* Load an unsigned element of ESZ from RM[LAST].  */
2387 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2388                                  int rm, int esz)
2389 {
2390     TCGv_ptr p = tcg_temp_new_ptr();
2391     TCGv_i64 r;
2392
2393     /* Convert offset into vector into offset into ENV.
2394      * The final adjustment for the vector register base
2395      * is added via constant offset to the load.
2396      */
2397 #ifdef HOST_WORDS_BIGENDIAN
2398     /* Adjust for element ordering.  See vec_reg_offset.  */
2399     if (esz < 3) {
2400         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2401     }
2402 #endif
2403     tcg_gen_ext_i32_ptr(p, last);
2404     tcg_gen_add_ptr(p, p, cpu_env);
2405
2406     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2407     tcg_temp_free_ptr(p);
2408
2409     return r;
2410 }
2411
2412 /* Compute CLAST for a Zreg.  */
2413 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2414 {
2415     TCGv_i32 last;
2416     TCGLabel *over;
2417     TCGv_i64 ele;
2418     unsigned vsz, esz = a->esz;
2419
2420     if (!sve_access_check(s)) {
2421         return true;
2422     }
2423
2424     last = tcg_temp_local_new_i32();
2425     over = gen_new_label();
2426
2427     find_last_active(s, last, esz, a->pg);
2428
2429     /* There is of course no movcond for a 2048-bit vector,
2430      * so we must branch over the actual store.
2431      */
2432     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2433
2434     if (!before) {
2435         incr_last_active(s, last, esz);
2436     }
2437
2438     ele = load_last_active(s, last, a->rm, esz);
2439     tcg_temp_free_i32(last);
2440
2441     vsz = vec_full_reg_size(s);
2442     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2443     tcg_temp_free_i64(ele);
2444
2445     /* If this insn used MOVPRFX, we may need a second move.  */
2446     if (a->rd != a->rn) {
2447         TCGLabel *done = gen_new_label();
2448         tcg_gen_br(done);
2449
2450         gen_set_label(over);
2451         do_mov_z(s, a->rd, a->rn);
2452
2453         gen_set_label(done);
2454     } else {
2455         gen_set_label(over);
2456     }
2457     return true;
2458 }
2459
2460 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2461 {
2462     return do_clast_vector(s, a, false);
2463 }
2464
2465 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2466 {
2467     return do_clast_vector(s, a, true);
2468 }
2469
2470 /* Compute CLAST for a scalar.  */
2471 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2472                             bool before, TCGv_i64 reg_val)
2473 {
2474     TCGv_i32 last = tcg_temp_new_i32();
2475     TCGv_i64 ele, cmp, zero;
2476
2477     find_last_active(s, last, esz, pg);
2478
2479     /* Extend the original value of last prior to incrementing.  */
2480     cmp = tcg_temp_new_i64();
2481     tcg_gen_ext_i32_i64(cmp, last);
2482
2483     if (!before) {
2484         incr_last_active(s, last, esz);
2485     }
2486
2487     /* The conceit here is that while last < 0 indicates not found, after
2488      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2489      * from which we can load garbage.  We then discard the garbage with
2490      * a conditional move.
2491      */
2492     ele = load_last_active(s, last, rm, esz);
2493     tcg_temp_free_i32(last);
2494
2495     zero = tcg_const_i64(0);
2496     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2497
2498     tcg_temp_free_i64(zero);
2499     tcg_temp_free_i64(cmp);
2500     tcg_temp_free_i64(ele);
2501 }
2502
2503 /* Compute CLAST for a Vreg.  */
2504 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2505 {
2506     if (sve_access_check(s)) {
2507         int esz = a->esz;
2508         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2509         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2510
2511         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2512         write_fp_dreg(s, a->rd, reg);
2513         tcg_temp_free_i64(reg);
2514     }
2515     return true;
2516 }
2517
2518 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2519 {
2520     return do_clast_fp(s, a, false);
2521 }
2522
2523 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2524 {
2525     return do_clast_fp(s, a, true);
2526 }
2527
2528 /* Compute CLAST for a Xreg.  */
2529 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2530 {
2531     TCGv_i64 reg;
2532
2533     if (!sve_access_check(s)) {
2534         return true;
2535     }
2536
2537     reg = cpu_reg(s, a->rd);
2538     switch (a->esz) {
2539     case 0:
2540         tcg_gen_ext8u_i64(reg, reg);
2541         break;
2542     case 1:
2543         tcg_gen_ext16u_i64(reg, reg);
2544         break;
2545     case 2:
2546         tcg_gen_ext32u_i64(reg, reg);
2547         break;
2548     case 3:
2549         break;
2550     default:
2551         g_assert_not_reached();
2552     }
2553
2554     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2555     return true;
2556 }
2557
2558 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2559 {
2560     return do_clast_general(s, a, false);
2561 }
2562
2563 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2564 {
2565     return do_clast_general(s, a, true);
2566 }
2567
2568 /* Compute LAST for a scalar.  */
2569 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2570                                int pg, int rm, bool before)
2571 {
2572     TCGv_i32 last = tcg_temp_new_i32();
2573     TCGv_i64 ret;
2574
2575     find_last_active(s, last, esz, pg);
2576     if (before) {
2577         wrap_last_active(s, last, esz);
2578     } else {
2579         incr_last_active(s, last, esz);
2580     }
2581
2582     ret = load_last_active(s, last, rm, esz);
2583     tcg_temp_free_i32(last);
2584     return ret;
2585 }
2586
2587 /* Compute LAST for a Vreg.  */
2588 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590     if (sve_access_check(s)) {
2591         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2592         write_fp_dreg(s, a->rd, val);
2593         tcg_temp_free_i64(val);
2594     }
2595     return true;
2596 }
2597
2598 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2599 {
2600     return do_last_fp(s, a, false);
2601 }
2602
2603 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2604 {
2605     return do_last_fp(s, a, true);
2606 }
2607
2608 /* Compute LAST for a Xreg.  */
2609 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2610 {
2611     if (sve_access_check(s)) {
2612         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2613         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2614         tcg_temp_free_i64(val);
2615     }
2616     return true;
2617 }
2618
2619 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2620 {
2621     return do_last_general(s, a, false);
2622 }
2623
2624 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2625 {
2626     return do_last_general(s, a, true);
2627 }
2628
2629 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2630 {
2631     if (sve_access_check(s)) {
2632         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2633     }
2634     return true;
2635 }
2636
2637 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2638 {
2639     if (sve_access_check(s)) {
2640         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2641         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2642         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2643         tcg_temp_free_i64(t);
2644     }
2645     return true;
2646 }
2647
2648 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2649 {
2650     static gen_helper_gvec_3 * const fns[4] = {
2651         NULL,
2652         gen_helper_sve_revb_h,
2653         gen_helper_sve_revb_s,
2654         gen_helper_sve_revb_d,
2655     };
2656     return do_zpz_ool(s, a, fns[a->esz]);
2657 }
2658
2659 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2660 {
2661     static gen_helper_gvec_3 * const fns[4] = {
2662         NULL,
2663         NULL,
2664         gen_helper_sve_revh_s,
2665         gen_helper_sve_revh_d,
2666     };
2667     return do_zpz_ool(s, a, fns[a->esz]);
2668 }
2669
2670 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2671 {
2672     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2673 }
2674
2675 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2676 {
2677     static gen_helper_gvec_3 * const fns[4] = {
2678         gen_helper_sve_rbit_b,
2679         gen_helper_sve_rbit_h,
2680         gen_helper_sve_rbit_s,
2681         gen_helper_sve_rbit_d,
2682     };
2683     return do_zpz_ool(s, a, fns[a->esz]);
2684 }
2685
2686 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2687 {
2688     if (sve_access_check(s)) {
2689         unsigned vsz = vec_full_reg_size(s);
2690         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2691                            vec_full_reg_offset(s, a->rn),
2692                            vec_full_reg_offset(s, a->rm),
2693                            pred_full_reg_offset(s, a->pg),
2694                            vsz, vsz, a->esz, gen_helper_sve_splice);
2695     }
2696     return true;
2697 }
2698
2699 /*
2700  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
2701  */
2702
2703 /* Subroutine loading a vector register at VOFS of LEN bytes.
2704  * The load should begin at the address Rn + IMM.
2705  */
2706
2707 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
2708                    int rn, int imm)
2709 {
2710     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
2711     uint32_t len_remain = len % 8;
2712     uint32_t nparts = len / 8 + ctpop8(len_remain);
2713     int midx = get_mem_index(s);
2714     TCGv_i64 addr, t0, t1;
2715
2716     addr = tcg_temp_new_i64();
2717     t0 = tcg_temp_new_i64();
2718
2719     /* Note that unpredicated load/store of vector/predicate registers
2720      * are defined as a stream of bytes, which equates to little-endian
2721      * operations on larger quantities.  There is no nice way to force
2722      * a little-endian load for aarch64_be-linux-user out of line.
2723      *
2724      * Attempt to keep code expansion to a minimum by limiting the
2725      * amount of unrolling done.
2726      */
2727     if (nparts <= 4) {
2728         int i;
2729
2730         for (i = 0; i < len_align; i += 8) {
2731             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
2732             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2733             tcg_gen_st_i64(t0, cpu_env, vofs + i);
2734         }
2735     } else {
2736         TCGLabel *loop = gen_new_label();
2737         TCGv_ptr tp, i = tcg_const_local_ptr(0);
2738
2739         gen_set_label(loop);
2740
2741         /* Minimize the number of local temps that must be re-read from
2742          * the stack each iteration.  Instead, re-compute values other
2743          * than the loop counter.
2744          */
2745         tp = tcg_temp_new_ptr();
2746         tcg_gen_addi_ptr(tp, i, imm);
2747         tcg_gen_extu_ptr_i64(addr, tp);
2748         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
2749
2750         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2751
2752         tcg_gen_add_ptr(tp, cpu_env, i);
2753         tcg_gen_addi_ptr(i, i, 8);
2754         tcg_gen_st_i64(t0, tp, vofs);
2755         tcg_temp_free_ptr(tp);
2756
2757         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
2758         tcg_temp_free_ptr(i);
2759     }
2760
2761     /* Predicate register loads can be any multiple of 2.
2762      * Note that we still store the entire 64-bit unit into cpu_env.
2763      */
2764     if (len_remain) {
2765         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
2766
2767         switch (len_remain) {
2768         case 2:
2769         case 4:
2770         case 8:
2771             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
2772             break;
2773
2774         case 6:
2775             t1 = tcg_temp_new_i64();
2776             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
2777             tcg_gen_addi_i64(addr, addr, 4);
2778             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
2779             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
2780             tcg_temp_free_i64(t1);
2781             break;
2782
2783         default:
2784             g_assert_not_reached();
2785         }
2786         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
2787     }
2788     tcg_temp_free_i64(addr);
2789     tcg_temp_free_i64(t0);
2790 }
2791
2792 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
2793 {
2794     if (sve_access_check(s)) {
2795         int size = vec_full_reg_size(s);
2796         int off = vec_full_reg_offset(s, a->rd);
2797         do_ldr(s, off, size, a->rn, a->imm * size);
2798     }
2799     return true;
2800 }
2801
2802 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
2803 {
2804     if (sve_access_check(s)) {
2805         int size = pred_full_reg_size(s);
2806         int off = pred_full_reg_offset(s, a->rd);
2807         do_ldr(s, off, size, a->rn, a->imm * size);
2808     }
2809     return true;
2810 }