target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36 /*
  37  * Helpers for extracting complex instruction fields.
  38  */
  39
  40 /* See e.g. ASR (immediate, predicated).
  41  * Returns -1 for unallocated encoding; diagnose later.
  42  */
  43 static int tszimm_esz(int x)
  44 {
  45     x >>= 3;  /* discard imm3 */
  46     return 31 - clz32(x);
  47 }
  48
  49 static int tszimm_shr(int x)
  50 {
  51     return (16 << tszimm_esz(x)) - x;
  52 }
  53
  54 /* See e.g. LSL (immediate, predicated).  */
  55 static int tszimm_shl(int x)
  56 {
  57     return x - (8 << tszimm_esz(x));
  58 }
  59
  60 static inline int plus1(int x)
  61 {
  62     return x + 1;
  63 }
  64
  65 /*
  66  * Include the generated decoder.
  67  */
  68
  69 #include "decode-sve.inc.c"
  70
  71 /*
  72  * Implement all of the translator functions referenced by the decoder.
  73  */
  74
  75 /* Return the offset info CPUARMState of the predicate vector register Pn.
  76  * Note for this purpose, FFR is P16.
  77  */
  78 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  79 {
  80     return offsetof(CPUARMState, vfp.pregs[regno]);
  81 }
  82
  83 /* Return the byte size of the whole predicate register, VL / 64.  */
  84 static inline int pred_full_reg_size(DisasContext *s)
  85 {
  86     return s->sve_len >> 3;
  87 }
  88
  89 /* Round up the size of a register to a size allowed by
  90  * the tcg vector infrastructure.  Any operation which uses this
  91  * size may assume that the bits above pred_full_reg_size are zero,
  92  * and must leave them the same way.
  93  *
  94  * Note that this is not needed for the vector registers as they
  95  * are always properly sized for tcg vectors.
  96  */
  97 static int size_for_gvec(int size)
  98 {
  99     if (size <= 8) {
 100         return 8;
 101     } else {
 102         return QEMU_ALIGN_UP(size, 16);
 103     }
 104 }
 105
 106 static int pred_gvec_reg_size(DisasContext *s)
 107 {
 108     return size_for_gvec(pred_full_reg_size(s));
 109 }
 110
 111 /* Invoke a vector expander on two Zregs.  */
 112 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 113                          int esz, int rd, int rn)
 114 {
 115     if (sve_access_check(s)) {
 116         unsigned vsz = vec_full_reg_size(s);
 117         gvec_fn(esz, vec_full_reg_offset(s, rd),
 118                 vec_full_reg_offset(s, rn), vsz, vsz);
 119     }
 120     return true;
 121 }
 122
 123 /* Invoke a vector expander on three Zregs.  */
 124 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 125                          int esz, int rd, int rn, int rm)
 126 {
 127     if (sve_access_check(s)) {
 128         unsigned vsz = vec_full_reg_size(s);
 129         gvec_fn(esz, vec_full_reg_offset(s, rd),
 130                 vec_full_reg_offset(s, rn),
 131                 vec_full_reg_offset(s, rm), vsz, vsz);
 132     }
 133     return true;
 134 }
 135
 136 /* Invoke a vector move on two Zregs.  */
 137 static bool do_mov_z(DisasContext *s, int rd, int rn)
 138 {
 139     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 140 }
 141
 142 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 143 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 144 {
 145     unsigned vsz = vec_full_reg_size(s);
 146     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 147 }
 148
 149 /* Invoke a vector expander on two Pregs.  */
 150 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 151                          int esz, int rd, int rn)
 152 {
 153     if (sve_access_check(s)) {
 154         unsigned psz = pred_gvec_reg_size(s);
 155         gvec_fn(esz, pred_full_reg_offset(s, rd),
 156                 pred_full_reg_offset(s, rn), psz, psz);
 157     }
 158     return true;
 159 }
 160
 161 /* Invoke a vector expander on three Pregs.  */
 162 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 163                          int esz, int rd, int rn, int rm)
 164 {
 165     if (sve_access_check(s)) {
 166         unsigned psz = pred_gvec_reg_size(s);
 167         gvec_fn(esz, pred_full_reg_offset(s, rd),
 168                 pred_full_reg_offset(s, rn),
 169                 pred_full_reg_offset(s, rm), psz, psz);
 170     }
 171     return true;
 172 }
 173
 174 /* Invoke a vector operation on four Pregs.  */
 175 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 176                         int rd, int rn, int rm, int rg)
 177 {
 178     if (sve_access_check(s)) {
 179         unsigned psz = pred_gvec_reg_size(s);
 180         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 181                        pred_full_reg_offset(s, rn),
 182                        pred_full_reg_offset(s, rm),
 183                        pred_full_reg_offset(s, rg),
 184                        psz, psz, gvec_op);
 185     }
 186     return true;
 187 }
 188
 189 /* Invoke a vector move on two Pregs.  */
 190 static bool do_mov_p(DisasContext *s, int rd, int rn)
 191 {
 192     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 193 }
 194
 195 /* Set the cpu flags as per a return from an SVE helper.  */
 196 static void do_pred_flags(TCGv_i32 t)
 197 {
 198     tcg_gen_mov_i32(cpu_NF, t);
 199     tcg_gen_andi_i32(cpu_ZF, t, 2);
 200     tcg_gen_andi_i32(cpu_CF, t, 1);
 201     tcg_gen_movi_i32(cpu_VF, 0);
 202 }
 203
 204 /* Subroutines computing the ARM PredTest psuedofunction.  */
 205 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 206 {
 207     TCGv_i32 t = tcg_temp_new_i32();
 208
 209     gen_helper_sve_predtest1(t, d, g);
 210     do_pred_flags(t);
 211     tcg_temp_free_i32(t);
 212 }
 213
 214 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 215 {
 216     TCGv_ptr dptr = tcg_temp_new_ptr();
 217     TCGv_ptr gptr = tcg_temp_new_ptr();
 218     TCGv_i32 t;
 219
 220     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 221     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 222     t = tcg_const_i32(words);
 223
 224     gen_helper_sve_predtest(t, dptr, gptr, t);
 225     tcg_temp_free_ptr(dptr);
 226     tcg_temp_free_ptr(gptr);
 227
 228     do_pred_flags(t);
 229     tcg_temp_free_i32(t);
 230 }
 231
 232 /* For each element size, the bits within a predicate word that are active.  */
 233 const uint64_t pred_esz_masks[4] = {
 234     0xffffffffffffffffull, 0x5555555555555555ull,
 235     0x1111111111111111ull, 0x0101010101010101ull
 236 };
 237
 238 /*
 239  *** SVE Logical - Unpredicated Group
 240  */
 241
 242 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 243 {
 244     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 245 }
 246
 247 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 248 {
 249     if (a->rn == a->rm) { /* MOV */
 250         return do_mov_z(s, a->rd, a->rn);
 251     } else {
 252         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 253     }
 254 }
 255
 256 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 257 {
 258     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 259 }
 260
 261 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 262 {
 263     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 264 }
 265
 266 /*
 267  *** SVE Integer Arithmetic - Unpredicated Group
 268  */
 269
 270 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 271 {
 272     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 273 }
 274
 275 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 276 {
 277     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 278 }
 279
 280 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 281 {
 282     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 283 }
 284
 285 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 286 {
 287     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 288 }
 289
 290 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Binary Predicated Group
 302  */
 303
 304 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 305 {
 306     unsigned vsz = vec_full_reg_size(s);
 307     if (fn == NULL) {
 308         return false;
 309     }
 310     if (sve_access_check(s)) {
 311         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 312                            vec_full_reg_offset(s, a->rn),
 313                            vec_full_reg_offset(s, a->rm),
 314                            pred_full_reg_offset(s, a->pg),
 315                            vsz, vsz, 0, fn);
 316     }
 317     return true;
 318 }
 319
 320 #define DO_ZPZZ(NAME, name) \
 321 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 322                                 uint32_t insn)                            \
 323 {                                                                         \
 324     static gen_helper_gvec_4 * const fns[4] = {                           \
 325         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 326         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 327     };                                                                    \
 328     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 329 }
 330
 331 DO_ZPZZ(AND, and)
 332 DO_ZPZZ(EOR, eor)
 333 DO_ZPZZ(ORR, orr)
 334 DO_ZPZZ(BIC, bic)
 335
 336 DO_ZPZZ(ADD, add)
 337 DO_ZPZZ(SUB, sub)
 338
 339 DO_ZPZZ(SMAX, smax)
 340 DO_ZPZZ(UMAX, umax)
 341 DO_ZPZZ(SMIN, smin)
 342 DO_ZPZZ(UMIN, umin)
 343 DO_ZPZZ(SABD, sabd)
 344 DO_ZPZZ(UABD, uabd)
 345
 346 DO_ZPZZ(MUL, mul)
 347 DO_ZPZZ(SMULH, smulh)
 348 DO_ZPZZ(UMULH, umulh)
 349
 350 DO_ZPZZ(ASR, asr)
 351 DO_ZPZZ(LSR, lsr)
 352 DO_ZPZZ(LSL, lsl)
 353
 354 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 355 {
 356     static gen_helper_gvec_4 * const fns[4] = {
 357         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 358     };
 359     return do_zpzz_ool(s, a, fns[a->esz]);
 360 }
 361
 362 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 363 {
 364     static gen_helper_gvec_4 * const fns[4] = {
 365         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 366     };
 367     return do_zpzz_ool(s, a, fns[a->esz]);
 368 }
 369
 370 #undef DO_ZPZZ
 371
 372 /*
 373  *** SVE Integer Arithmetic - Unary Predicated Group
 374  */
 375
 376 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 377 {
 378     if (fn == NULL) {
 379         return false;
 380     }
 381     if (sve_access_check(s)) {
 382         unsigned vsz = vec_full_reg_size(s);
 383         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 384                            vec_full_reg_offset(s, a->rn),
 385                            pred_full_reg_offset(s, a->pg),
 386                            vsz, vsz, 0, fn);
 387     }
 388     return true;
 389 }
 390
 391 #define DO_ZPZ(NAME, name) \
 392 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 393 {                                                                   \
 394     static gen_helper_gvec_3 * const fns[4] = {                     \
 395         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 396         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 397     };                                                              \
 398     return do_zpz_ool(s, a, fns[a->esz]);                           \
 399 }
 400
 401 DO_ZPZ(CLS, cls)
 402 DO_ZPZ(CLZ, clz)
 403 DO_ZPZ(CNT_zpz, cnt_zpz)
 404 DO_ZPZ(CNOT, cnot)
 405 DO_ZPZ(NOT_zpz, not_zpz)
 406 DO_ZPZ(ABS, abs)
 407 DO_ZPZ(NEG, neg)
 408
 409 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 410 {
 411     static gen_helper_gvec_3 * const fns[4] = {
 412         NULL,
 413         gen_helper_sve_fabs_h,
 414         gen_helper_sve_fabs_s,
 415         gen_helper_sve_fabs_d
 416     };
 417     return do_zpz_ool(s, a, fns[a->esz]);
 418 }
 419
 420 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 421 {
 422     static gen_helper_gvec_3 * const fns[4] = {
 423         NULL,
 424         gen_helper_sve_fneg_h,
 425         gen_helper_sve_fneg_s,
 426         gen_helper_sve_fneg_d
 427     };
 428     return do_zpz_ool(s, a, fns[a->esz]);
 429 }
 430
 431 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 432 {
 433     static gen_helper_gvec_3 * const fns[4] = {
 434         NULL,
 435         gen_helper_sve_sxtb_h,
 436         gen_helper_sve_sxtb_s,
 437         gen_helper_sve_sxtb_d
 438     };
 439     return do_zpz_ool(s, a, fns[a->esz]);
 440 }
 441
 442 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 443 {
 444     static gen_helper_gvec_3 * const fns[4] = {
 445         NULL,
 446         gen_helper_sve_uxtb_h,
 447         gen_helper_sve_uxtb_s,
 448         gen_helper_sve_uxtb_d
 449     };
 450     return do_zpz_ool(s, a, fns[a->esz]);
 451 }
 452
 453 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 454 {
 455     static gen_helper_gvec_3 * const fns[4] = {
 456         NULL, NULL,
 457         gen_helper_sve_sxth_s,
 458         gen_helper_sve_sxth_d
 459     };
 460     return do_zpz_ool(s, a, fns[a->esz]);
 461 }
 462
 463 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL, NULL,
 467         gen_helper_sve_uxth_s,
 468         gen_helper_sve_uxth_d
 469     };
 470     return do_zpz_ool(s, a, fns[a->esz]);
 471 }
 472
 473 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 474 {
 475     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 476 }
 477
 478 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 479 {
 480     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 481 }
 482
 483 #undef DO_ZPZ
 484
 485 /*
 486  *** SVE Integer Reduction Group
 487  */
 488
 489 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 490 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 491                        gen_helper_gvec_reduc *fn)
 492 {
 493     unsigned vsz = vec_full_reg_size(s);
 494     TCGv_ptr t_zn, t_pg;
 495     TCGv_i32 desc;
 496     TCGv_i64 temp;
 497
 498     if (fn == NULL) {
 499         return false;
 500     }
 501     if (!sve_access_check(s)) {
 502         return true;
 503     }
 504
 505     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 506     temp = tcg_temp_new_i64();
 507     t_zn = tcg_temp_new_ptr();
 508     t_pg = tcg_temp_new_ptr();
 509
 510     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 511     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 512     fn(temp, t_zn, t_pg, desc);
 513     tcg_temp_free_ptr(t_zn);
 514     tcg_temp_free_ptr(t_pg);
 515     tcg_temp_free_i32(desc);
 516
 517     write_fp_dreg(s, a->rd, temp);
 518     tcg_temp_free_i64(temp);
 519     return true;
 520 }
 521
 522 #define DO_VPZ(NAME, name) \
 523 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 524 {                                                                        \
 525     static gen_helper_gvec_reduc * const fns[4] = {                      \
 526         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 527         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 528     };                                                                   \
 529     return do_vpz_ool(s, a, fns[a->esz]);                                \
 530 }
 531
 532 DO_VPZ(ORV, orv)
 533 DO_VPZ(ANDV, andv)
 534 DO_VPZ(EORV, eorv)
 535
 536 DO_VPZ(UADDV, uaddv)
 537 DO_VPZ(SMAXV, smaxv)
 538 DO_VPZ(UMAXV, umaxv)
 539 DO_VPZ(SMINV, sminv)
 540 DO_VPZ(UMINV, uminv)
 541
 542 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 543 {
 544     static gen_helper_gvec_reduc * const fns[4] = {
 545         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 546         gen_helper_sve_saddv_s, NULL
 547     };
 548     return do_vpz_ool(s, a, fns[a->esz]);
 549 }
 550
 551 #undef DO_VPZ
 552
 553 /*
 554  *** SVE Shift by Immediate - Predicated Group
 555  */
 556
 557 /* Store zero into every active element of Zd.  We will use this for two
 558  * and three-operand predicated instructions for which logic dictates a
 559  * zero result.
 560  */
 561 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 562 {
 563     static gen_helper_gvec_2 * const fns[4] = {
 564         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 565         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 566     };
 567     if (sve_access_check(s)) {
 568         unsigned vsz = vec_full_reg_size(s);
 569         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 570                            pred_full_reg_offset(s, pg),
 571                            vsz, vsz, 0, fns[esz]);
 572     }
 573     return true;
 574 }
 575
 576 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 577                         gen_helper_gvec_3 *fn)
 578 {
 579     if (sve_access_check(s)) {
 580         unsigned vsz = vec_full_reg_size(s);
 581         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 582                            vec_full_reg_offset(s, a->rn),
 583                            pred_full_reg_offset(s, a->pg),
 584                            vsz, vsz, a->imm, fn);
 585     }
 586     return true;
 587 }
 588
 589 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 590 {
 591     static gen_helper_gvec_3 * const fns[4] = {
 592         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 593         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 594     };
 595     if (a->esz < 0) {
 596         /* Invalid tsz encoding -- see tszimm_esz. */
 597         return false;
 598     }
 599     /* Shift by element size is architecturally valid.  For
 600        arithmetic right-shift, it's the same as by one less. */
 601     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 602     return do_zpzi_ool(s, a, fns[a->esz]);
 603 }
 604
 605 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 606 {
 607     static gen_helper_gvec_3 * const fns[4] = {
 608         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 609         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 610     };
 611     if (a->esz < 0) {
 612         return false;
 613     }
 614     /* Shift by element size is architecturally valid.
 615        For logical shifts, it is a zeroing operation.  */
 616     if (a->imm >= (8 << a->esz)) {
 617         return do_clr_zp(s, a->rd, a->pg, a->esz);
 618     } else {
 619         return do_zpzi_ool(s, a, fns[a->esz]);
 620     }
 621 }
 622
 623 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 624 {
 625     static gen_helper_gvec_3 * const fns[4] = {
 626         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 627         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 628     };
 629     if (a->esz < 0) {
 630         return false;
 631     }
 632     /* Shift by element size is architecturally valid.
 633        For logical shifts, it is a zeroing operation.  */
 634     if (a->imm >= (8 << a->esz)) {
 635         return do_clr_zp(s, a->rd, a->pg, a->esz);
 636     } else {
 637         return do_zpzi_ool(s, a, fns[a->esz]);
 638     }
 639 }
 640
 641 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 642 {
 643     static gen_helper_gvec_3 * const fns[4] = {
 644         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 645         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 646     };
 647     if (a->esz < 0) {
 648         return false;
 649     }
 650     /* Shift by element size is architecturally valid.  For arithmetic
 651        right shift for division, it is a zeroing operation.  */
 652     if (a->imm >= (8 << a->esz)) {
 653         return do_clr_zp(s, a->rd, a->pg, a->esz);
 654     } else {
 655         return do_zpzi_ool(s, a, fns[a->esz]);
 656     }
 657 }
 658
 659 /*
 660  *** SVE Bitwise Shift - Predicated Group
 661  */
 662
 663 #define DO_ZPZW(NAME, name) \
 664 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 665                                 uint32_t insn)                            \
 666 {                                                                         \
 667     static gen_helper_gvec_4 * const fns[3] = {                           \
 668         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 669         gen_helper_sve_##name##_zpzw_s,                                   \
 670     };                                                                    \
 671     if (a->esz < 0 || a->esz >= 3) {                                      \
 672         return false;                                                     \
 673     }                                                                     \
 674     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 675 }
 676
 677 DO_ZPZW(ASR, asr)
 678 DO_ZPZW(LSR, lsr)
 679 DO_ZPZW(LSL, lsl)
 680
 681 #undef DO_ZPZW
 682
 683 /*
 684  *** SVE Bitwise Shift - Unpredicated Group
 685  */
 686
 687 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 688                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 689                                          int64_t, uint32_t, uint32_t))
 690 {
 691     if (a->esz < 0) {
 692         /* Invalid tsz encoding -- see tszimm_esz. */
 693         return false;
 694     }
 695     if (sve_access_check(s)) {
 696         unsigned vsz = vec_full_reg_size(s);
 697         /* Shift by element size is architecturally valid.  For
 698            arithmetic right-shift, it's the same as by one less.
 699            Otherwise it is a zeroing operation.  */
 700         if (a->imm >= 8 << a->esz) {
 701             if (asr) {
 702                 a->imm = (8 << a->esz) - 1;
 703             } else {
 704                 do_dupi_z(s, a->rd, 0);
 705                 return true;
 706             }
 707         }
 708         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 709                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 710     }
 711     return true;
 712 }
 713
 714 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 715 {
 716     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 717 }
 718
 719 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 720 {
 721     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 722 }
 723
 724 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 725 {
 726     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 727 }
 728
 729 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 730 {
 731     if (fn == NULL) {
 732         return false;
 733     }
 734     if (sve_access_check(s)) {
 735         unsigned vsz = vec_full_reg_size(s);
 736         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 737                            vec_full_reg_offset(s, a->rn),
 738                            vec_full_reg_offset(s, a->rm),
 739                            vsz, vsz, 0, fn);
 740     }
 741     return true;
 742 }
 743
 744 #define DO_ZZW(NAME, name) \
 745 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 746                                uint32_t insn)                             \
 747 {                                                                         \
 748     static gen_helper_gvec_3 * const fns[4] = {                           \
 749         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 750         gen_helper_sve_##name##_zzw_s, NULL                               \
 751     };                                                                    \
 752     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 753 }
 754
 755 DO_ZZW(ASR, asr)
 756 DO_ZZW(LSR, lsr)
 757 DO_ZZW(LSL, lsl)
 758
 759 #undef DO_ZZW
 760
 761 /*
 762  *** SVE Integer Multiply-Add Group
 763  */
 764
 765 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 766                          gen_helper_gvec_5 *fn)
 767 {
 768     if (sve_access_check(s)) {
 769         unsigned vsz = vec_full_reg_size(s);
 770         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 771                            vec_full_reg_offset(s, a->ra),
 772                            vec_full_reg_offset(s, a->rn),
 773                            vec_full_reg_offset(s, a->rm),
 774                            pred_full_reg_offset(s, a->pg),
 775                            vsz, vsz, 0, fn);
 776     }
 777     return true;
 778 }
 779
 780 #define DO_ZPZZZ(NAME, name) \
 781 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 782 {                                                                    \
 783     static gen_helper_gvec_5 * const fns[4] = {                      \
 784         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 785         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 786     };                                                               \
 787     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 788 }
 789
 790 DO_ZPZZZ(MLA, mla)
 791 DO_ZPZZZ(MLS, mls)
 792
 793 #undef DO_ZPZZZ
 794
 795 /*
 796  *** SVE Index Generation Group
 797  */
 798
 799 static void do_index(DisasContext *s, int esz, int rd,
 800                      TCGv_i64 start, TCGv_i64 incr)
 801 {
 802     unsigned vsz = vec_full_reg_size(s);
 803     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 804     TCGv_ptr t_zd = tcg_temp_new_ptr();
 805
 806     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 807     if (esz == 3) {
 808         gen_helper_sve_index_d(t_zd, start, incr, desc);
 809     } else {
 810         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 811         static index_fn * const fns[3] = {
 812             gen_helper_sve_index_b,
 813             gen_helper_sve_index_h,
 814             gen_helper_sve_index_s,
 815         };
 816         TCGv_i32 s32 = tcg_temp_new_i32();
 817         TCGv_i32 i32 = tcg_temp_new_i32();
 818
 819         tcg_gen_extrl_i64_i32(s32, start);
 820         tcg_gen_extrl_i64_i32(i32, incr);
 821         fns[esz](t_zd, s32, i32, desc);
 822
 823         tcg_temp_free_i32(s32);
 824         tcg_temp_free_i32(i32);
 825     }
 826     tcg_temp_free_ptr(t_zd);
 827     tcg_temp_free_i32(desc);
 828 }
 829
 830 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 831 {
 832     if (sve_access_check(s)) {
 833         TCGv_i64 start = tcg_const_i64(a->imm1);
 834         TCGv_i64 incr = tcg_const_i64(a->imm2);
 835         do_index(s, a->esz, a->rd, start, incr);
 836         tcg_temp_free_i64(start);
 837         tcg_temp_free_i64(incr);
 838     }
 839     return true;
 840 }
 841
 842 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 843 {
 844     if (sve_access_check(s)) {
 845         TCGv_i64 start = tcg_const_i64(a->imm);
 846         TCGv_i64 incr = cpu_reg(s, a->rm);
 847         do_index(s, a->esz, a->rd, start, incr);
 848         tcg_temp_free_i64(start);
 849     }
 850     return true;
 851 }
 852
 853 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 854 {
 855     if (sve_access_check(s)) {
 856         TCGv_i64 start = cpu_reg(s, a->rn);
 857         TCGv_i64 incr = tcg_const_i64(a->imm);
 858         do_index(s, a->esz, a->rd, start, incr);
 859         tcg_temp_free_i64(incr);
 860     }
 861     return true;
 862 }
 863
 864 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 865 {
 866     if (sve_access_check(s)) {
 867         TCGv_i64 start = cpu_reg(s, a->rn);
 868         TCGv_i64 incr = cpu_reg(s, a->rm);
 869         do_index(s, a->esz, a->rd, start, incr);
 870     }
 871     return true;
 872 }
 873
 874 /*
 875  *** SVE Stack Allocation Group
 876  */
 877
 878 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 879 {
 880     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 881     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 882     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 883     return true;
 884 }
 885
 886 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 887 {
 888     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 889     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 890     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 891     return true;
 892 }
 893
 894 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 895 {
 896     TCGv_i64 reg = cpu_reg(s, a->rd);
 897     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 898     return true;
 899 }
 900
 901 /*
 902  *** SVE Compute Vector Address Group
 903  */
 904
 905 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 906 {
 907     if (sve_access_check(s)) {
 908         unsigned vsz = vec_full_reg_size(s);
 909         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 910                            vec_full_reg_offset(s, a->rn),
 911                            vec_full_reg_offset(s, a->rm),
 912                            vsz, vsz, a->imm, fn);
 913     }
 914     return true;
 915 }
 916
 917 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 918 {
 919     return do_adr(s, a, gen_helper_sve_adr_p32);
 920 }
 921
 922 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 923 {
 924     return do_adr(s, a, gen_helper_sve_adr_p64);
 925 }
 926
 927 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 928 {
 929     return do_adr(s, a, gen_helper_sve_adr_s32);
 930 }
 931
 932 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 933 {
 934     return do_adr(s, a, gen_helper_sve_adr_u32);
 935 }
 936
 937 /*
 938  *** SVE Integer Misc - Unpredicated Group
 939  */
 940
 941 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 942 {
 943     static gen_helper_gvec_2 * const fns[4] = {
 944         NULL,
 945         gen_helper_sve_fexpa_h,
 946         gen_helper_sve_fexpa_s,
 947         gen_helper_sve_fexpa_d,
 948     };
 949     if (a->esz == 0) {
 950         return false;
 951     }
 952     if (sve_access_check(s)) {
 953         unsigned vsz = vec_full_reg_size(s);
 954         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 955                            vec_full_reg_offset(s, a->rn),
 956                            vsz, vsz, 0, fns[a->esz]);
 957     }
 958     return true;
 959 }
 960
 961 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 962 {
 963     static gen_helper_gvec_3 * const fns[4] = {
 964         NULL,
 965         gen_helper_sve_ftssel_h,
 966         gen_helper_sve_ftssel_s,
 967         gen_helper_sve_ftssel_d,
 968     };
 969     if (a->esz == 0) {
 970         return false;
 971     }
 972     if (sve_access_check(s)) {
 973         unsigned vsz = vec_full_reg_size(s);
 974         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 975                            vec_full_reg_offset(s, a->rn),
 976                            vec_full_reg_offset(s, a->rm),
 977                            vsz, vsz, 0, fns[a->esz]);
 978     }
 979     return true;
 980 }
 981
 982 /*
 983  *** SVE Predicate Logical Operations Group
 984  */
 985
 986 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 987                           const GVecGen4 *gvec_op)
 988 {
 989     if (!sve_access_check(s)) {
 990         return true;
 991     }
 992
 993     unsigned psz = pred_gvec_reg_size(s);
 994     int dofs = pred_full_reg_offset(s, a->rd);
 995     int nofs = pred_full_reg_offset(s, a->rn);
 996     int mofs = pred_full_reg_offset(s, a->rm);
 997     int gofs = pred_full_reg_offset(s, a->pg);
 998
 999     if (psz == 8) {
1000         /* Do the operation and the flags generation in temps.  */
1001         TCGv_i64 pd = tcg_temp_new_i64();
1002         TCGv_i64 pn = tcg_temp_new_i64();
1003         TCGv_i64 pm = tcg_temp_new_i64();
1004         TCGv_i64 pg = tcg_temp_new_i64();
1005
1006         tcg_gen_ld_i64(pn, cpu_env, nofs);
1007         tcg_gen_ld_i64(pm, cpu_env, mofs);
1008         tcg_gen_ld_i64(pg, cpu_env, gofs);
1009
1010         gvec_op->fni8(pd, pn, pm, pg);
1011         tcg_gen_st_i64(pd, cpu_env, dofs);
1012
1013         do_predtest1(pd, pg);
1014
1015         tcg_temp_free_i64(pd);
1016         tcg_temp_free_i64(pn);
1017         tcg_temp_free_i64(pm);
1018         tcg_temp_free_i64(pg);
1019     } else {
1020         /* The operation and flags generation is large.  The computation
1021          * of the flags depends on the original contents of the guarding
1022          * predicate.  If the destination overwrites the guarding predicate,
1023          * then the easiest way to get this right is to save a copy.
1024           */
1025         int tofs = gofs;
1026         if (a->rd == a->pg) {
1027             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1028             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1029         }
1030
1031         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1032         do_predtest(s, dofs, tofs, psz / 8);
1033     }
1034     return true;
1035 }
1036
1037 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1038 {
1039     tcg_gen_and_i64(pd, pn, pm);
1040     tcg_gen_and_i64(pd, pd, pg);
1041 }
1042
1043 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1044                            TCGv_vec pm, TCGv_vec pg)
1045 {
1046     tcg_gen_and_vec(vece, pd, pn, pm);
1047     tcg_gen_and_vec(vece, pd, pd, pg);
1048 }
1049
1050 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1051 {
1052     static const GVecGen4 op = {
1053         .fni8 = gen_and_pg_i64,
1054         .fniv = gen_and_pg_vec,
1055         .fno = gen_helper_sve_and_pppp,
1056         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1057     };
1058     if (a->s) {
1059         return do_pppp_flags(s, a, &op);
1060     } else if (a->rn == a->rm) {
1061         if (a->pg == a->rn) {
1062             return do_mov_p(s, a->rd, a->rn);
1063         } else {
1064             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1065         }
1066     } else if (a->pg == a->rn || a->pg == a->rm) {
1067         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1068     } else {
1069         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1070     }
1071 }
1072
1073 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1074 {
1075     tcg_gen_andc_i64(pd, pn, pm);
1076     tcg_gen_and_i64(pd, pd, pg);
1077 }
1078
1079 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1080                            TCGv_vec pm, TCGv_vec pg)
1081 {
1082     tcg_gen_andc_vec(vece, pd, pn, pm);
1083     tcg_gen_and_vec(vece, pd, pd, pg);
1084 }
1085
1086 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1087 {
1088     static const GVecGen4 op = {
1089         .fni8 = gen_bic_pg_i64,
1090         .fniv = gen_bic_pg_vec,
1091         .fno = gen_helper_sve_bic_pppp,
1092         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1093     };
1094     if (a->s) {
1095         return do_pppp_flags(s, a, &op);
1096     } else if (a->pg == a->rn) {
1097         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1098     } else {
1099         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1100     }
1101 }
1102
1103 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1104 {
1105     tcg_gen_xor_i64(pd, pn, pm);
1106     tcg_gen_and_i64(pd, pd, pg);
1107 }
1108
1109 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1110                            TCGv_vec pm, TCGv_vec pg)
1111 {
1112     tcg_gen_xor_vec(vece, pd, pn, pm);
1113     tcg_gen_and_vec(vece, pd, pd, pg);
1114 }
1115
1116 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1117 {
1118     static const GVecGen4 op = {
1119         .fni8 = gen_eor_pg_i64,
1120         .fniv = gen_eor_pg_vec,
1121         .fno = gen_helper_sve_eor_pppp,
1122         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1123     };
1124     if (a->s) {
1125         return do_pppp_flags(s, a, &op);
1126     } else {
1127         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1128     }
1129 }
1130
1131 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1132 {
1133     tcg_gen_and_i64(pn, pn, pg);
1134     tcg_gen_andc_i64(pm, pm, pg);
1135     tcg_gen_or_i64(pd, pn, pm);
1136 }
1137
1138 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1139                            TCGv_vec pm, TCGv_vec pg)
1140 {
1141     tcg_gen_and_vec(vece, pn, pn, pg);
1142     tcg_gen_andc_vec(vece, pm, pm, pg);
1143     tcg_gen_or_vec(vece, pd, pn, pm);
1144 }
1145
1146 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1147 {
1148     static const GVecGen4 op = {
1149         .fni8 = gen_sel_pg_i64,
1150         .fniv = gen_sel_pg_vec,
1151         .fno = gen_helper_sve_sel_pppp,
1152         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1153     };
1154     if (a->s) {
1155         return false;
1156     } else {
1157         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1158     }
1159 }
1160
1161 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1162 {
1163     tcg_gen_or_i64(pd, pn, pm);
1164     tcg_gen_and_i64(pd, pd, pg);
1165 }
1166
1167 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1168                            TCGv_vec pm, TCGv_vec pg)
1169 {
1170     tcg_gen_or_vec(vece, pd, pn, pm);
1171     tcg_gen_and_vec(vece, pd, pd, pg);
1172 }
1173
1174 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1175 {
1176     static const GVecGen4 op = {
1177         .fni8 = gen_orr_pg_i64,
1178         .fniv = gen_orr_pg_vec,
1179         .fno = gen_helper_sve_orr_pppp,
1180         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1181     };
1182     if (a->s) {
1183         return do_pppp_flags(s, a, &op);
1184     } else if (a->pg == a->rn && a->rn == a->rm) {
1185         return do_mov_p(s, a->rd, a->rn);
1186     } else {
1187         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1188     }
1189 }
1190
1191 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1192 {
1193     tcg_gen_orc_i64(pd, pn, pm);
1194     tcg_gen_and_i64(pd, pd, pg);
1195 }
1196
1197 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1198                            TCGv_vec pm, TCGv_vec pg)
1199 {
1200     tcg_gen_orc_vec(vece, pd, pn, pm);
1201     tcg_gen_and_vec(vece, pd, pd, pg);
1202 }
1203
1204 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1205 {
1206     static const GVecGen4 op = {
1207         .fni8 = gen_orn_pg_i64,
1208         .fniv = gen_orn_pg_vec,
1209         .fno = gen_helper_sve_orn_pppp,
1210         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1211     };
1212     if (a->s) {
1213         return do_pppp_flags(s, a, &op);
1214     } else {
1215         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1216     }
1217 }
1218
1219 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1220 {
1221     tcg_gen_or_i64(pd, pn, pm);
1222     tcg_gen_andc_i64(pd, pg, pd);
1223 }
1224
1225 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1226                            TCGv_vec pm, TCGv_vec pg)
1227 {
1228     tcg_gen_or_vec(vece, pd, pn, pm);
1229     tcg_gen_andc_vec(vece, pd, pg, pd);
1230 }
1231
1232 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1233 {
1234     static const GVecGen4 op = {
1235         .fni8 = gen_nor_pg_i64,
1236         .fniv = gen_nor_pg_vec,
1237         .fno = gen_helper_sve_nor_pppp,
1238         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1239     };
1240     if (a->s) {
1241         return do_pppp_flags(s, a, &op);
1242     } else {
1243         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1244     }
1245 }
1246
1247 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1248 {
1249     tcg_gen_and_i64(pd, pn, pm);
1250     tcg_gen_andc_i64(pd, pg, pd);
1251 }
1252
1253 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1254                            TCGv_vec pm, TCGv_vec pg)
1255 {
1256     tcg_gen_and_vec(vece, pd, pn, pm);
1257     tcg_gen_andc_vec(vece, pd, pg, pd);
1258 }
1259
1260 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1261 {
1262     static const GVecGen4 op = {
1263         .fni8 = gen_nand_pg_i64,
1264         .fniv = gen_nand_pg_vec,
1265         .fno = gen_helper_sve_nand_pppp,
1266         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1267     };
1268     if (a->s) {
1269         return do_pppp_flags(s, a, &op);
1270     } else {
1271         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1272     }
1273 }
1274
1275 /*
1276  *** SVE Predicate Misc Group
1277  */
1278
1279 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1280 {
1281     if (sve_access_check(s)) {
1282         int nofs = pred_full_reg_offset(s, a->rn);
1283         int gofs = pred_full_reg_offset(s, a->pg);
1284         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1285
1286         if (words == 1) {
1287             TCGv_i64 pn = tcg_temp_new_i64();
1288             TCGv_i64 pg = tcg_temp_new_i64();
1289
1290             tcg_gen_ld_i64(pn, cpu_env, nofs);
1291             tcg_gen_ld_i64(pg, cpu_env, gofs);
1292             do_predtest1(pn, pg);
1293
1294             tcg_temp_free_i64(pn);
1295             tcg_temp_free_i64(pg);
1296         } else {
1297             do_predtest(s, nofs, gofs, words);
1298         }
1299     }
1300     return true;
1301 }
1302
1303 /* See the ARM pseudocode DecodePredCount.  */
1304 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1305 {
1306     unsigned elements = fullsz >> esz;
1307     unsigned bound;
1308
1309     switch (pattern) {
1310     case 0x0: /* POW2 */
1311         return pow2floor(elements);
1312     case 0x1: /* VL1 */
1313     case 0x2: /* VL2 */
1314     case 0x3: /* VL3 */
1315     case 0x4: /* VL4 */
1316     case 0x5: /* VL5 */
1317     case 0x6: /* VL6 */
1318     case 0x7: /* VL7 */
1319     case 0x8: /* VL8 */
1320         bound = pattern;
1321         break;
1322     case 0x9: /* VL16 */
1323     case 0xa: /* VL32 */
1324     case 0xb: /* VL64 */
1325     case 0xc: /* VL128 */
1326     case 0xd: /* VL256 */
1327         bound = 16 << (pattern - 9);
1328         break;
1329     case 0x1d: /* MUL4 */
1330         return elements - elements % 4;
1331     case 0x1e: /* MUL3 */
1332         return elements - elements % 3;
1333     case 0x1f: /* ALL */
1334         return elements;
1335     default:   /* #uimm5 */
1336         return 0;
1337     }
1338     return elements >= bound ? bound : 0;
1339 }
1340
1341 /* This handles all of the predicate initialization instructions,
1342  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1343  * so that decode_pred_count returns 0.  For SETFFR, we will have
1344  * set RD == 16 == FFR.
1345  */
1346 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1347 {
1348     if (!sve_access_check(s)) {
1349         return true;
1350     }
1351
1352     unsigned fullsz = vec_full_reg_size(s);
1353     unsigned ofs = pred_full_reg_offset(s, rd);
1354     unsigned numelem, setsz, i;
1355     uint64_t word, lastword;
1356     TCGv_i64 t;
1357
1358     numelem = decode_pred_count(fullsz, pat, esz);
1359
1360     /* Determine what we must store into each bit, and how many.  */
1361     if (numelem == 0) {
1362         lastword = word = 0;
1363         setsz = fullsz;
1364     } else {
1365         setsz = numelem << esz;
1366         lastword = word = pred_esz_masks[esz];
1367         if (setsz % 64) {
1368             lastword &= ~(-1ull << (setsz % 64));
1369         }
1370     }
1371
1372     t = tcg_temp_new_i64();
1373     if (fullsz <= 64) {
1374         tcg_gen_movi_i64(t, lastword);
1375         tcg_gen_st_i64(t, cpu_env, ofs);
1376         goto done;
1377     }
1378
1379     if (word == lastword) {
1380         unsigned maxsz = size_for_gvec(fullsz / 8);
1381         unsigned oprsz = size_for_gvec(setsz / 8);
1382
1383         if (oprsz * 8 == setsz) {
1384             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1385             goto done;
1386         }
1387         if (oprsz * 8 == setsz + 8) {
1388             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1389             tcg_gen_movi_i64(t, 0);
1390             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1391             goto done;
1392         }
1393     }
1394
1395     setsz /= 8;
1396     fullsz /= 8;
1397
1398     tcg_gen_movi_i64(t, word);
1399     for (i = 0; i < setsz; i += 8) {
1400         tcg_gen_st_i64(t, cpu_env, ofs + i);
1401     }
1402     if (lastword != word) {
1403         tcg_gen_movi_i64(t, lastword);
1404         tcg_gen_st_i64(t, cpu_env, ofs + i);
1405         i += 8;
1406     }
1407     if (i < fullsz) {
1408         tcg_gen_movi_i64(t, 0);
1409         for (; i < fullsz; i += 8) {
1410             tcg_gen_st_i64(t, cpu_env, ofs + i);
1411         }
1412     }
1413
1414  done:
1415     tcg_temp_free_i64(t);
1416
1417     /* PTRUES */
1418     if (setflag) {
1419         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1420         tcg_gen_movi_i32(cpu_CF, word == 0);
1421         tcg_gen_movi_i32(cpu_VF, 0);
1422         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1423     }
1424     return true;
1425 }
1426
1427 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1428 {
1429     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1430 }
1431
1432 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1433 {
1434     /* Note pat == 31 is #all, to set all elements.  */
1435     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1436 }
1437
1438 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1439 {
1440     /* Note pat == 32 is #unimp, to set no elements.  */
1441     return do_predset(s, 0, a->rd, 32, false);
1442 }
1443
1444 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1445 {
1446     /* The path through do_pppp_flags is complicated enough to want to avoid
1447      * duplication.  Frob the arguments into the form of a predicated AND.
1448      */
1449     arg_rprr_s alt_a = {
1450         .rd = a->rd, .pg = a->pg, .s = a->s,
1451         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1452     };
1453     return trans_AND_pppp(s, &alt_a, insn);
1454 }
1455
1456 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1457 {
1458     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1459 }
1460
1461 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1462 {
1463     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1464 }
1465
1466 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1467                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1468                                            TCGv_ptr, TCGv_i32))
1469 {
1470     if (!sve_access_check(s)) {
1471         return true;
1472     }
1473
1474     TCGv_ptr t_pd = tcg_temp_new_ptr();
1475     TCGv_ptr t_pg = tcg_temp_new_ptr();
1476     TCGv_i32 t;
1477     unsigned desc;
1478
1479     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1480     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1481
1482     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1483     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1484     t = tcg_const_i32(desc);
1485
1486     gen_fn(t, t_pd, t_pg, t);
1487     tcg_temp_free_ptr(t_pd);
1488     tcg_temp_free_ptr(t_pg);
1489
1490     do_pred_flags(t);
1491     tcg_temp_free_i32(t);
1492     return true;
1493 }
1494
1495 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1496 {
1497     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1498 }
1499
1500 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1501 {
1502     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1503 }
1504
1505 /*
1506  *** SVE Element Count Group
1507  */
1508
1509 /* Perform an inline saturating addition of a 32-bit value within
1510  * a 64-bit register.  The second operand is known to be positive,
1511  * which halves the comparisions we must perform to bound the result.
1512  */
1513 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1514 {
1515     int64_t ibound;
1516     TCGv_i64 bound;
1517     TCGCond cond;
1518
1519     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1520     if (u) {
1521         tcg_gen_ext32u_i64(reg, reg);
1522     } else {
1523         tcg_gen_ext32s_i64(reg, reg);
1524     }
1525     if (d) {
1526         tcg_gen_sub_i64(reg, reg, val);
1527         ibound = (u ? 0 : INT32_MIN);
1528         cond = TCG_COND_LT;
1529     } else {
1530         tcg_gen_add_i64(reg, reg, val);
1531         ibound = (u ? UINT32_MAX : INT32_MAX);
1532         cond = TCG_COND_GT;
1533     }
1534     bound = tcg_const_i64(ibound);
1535     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1536     tcg_temp_free_i64(bound);
1537 }
1538
1539 /* Similarly with 64-bit values.  */
1540 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1541 {
1542     TCGv_i64 t0 = tcg_temp_new_i64();
1543     TCGv_i64 t1 = tcg_temp_new_i64();
1544     TCGv_i64 t2;
1545
1546     if (u) {
1547         if (d) {
1548             tcg_gen_sub_i64(t0, reg, val);
1549             tcg_gen_movi_i64(t1, 0);
1550             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1551         } else {
1552             tcg_gen_add_i64(t0, reg, val);
1553             tcg_gen_movi_i64(t1, -1);
1554             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1555         }
1556     } else {
1557         if (d) {
1558             /* Detect signed overflow for subtraction.  */
1559             tcg_gen_xor_i64(t0, reg, val);
1560             tcg_gen_sub_i64(t1, reg, val);
1561             tcg_gen_xor_i64(reg, reg, t0);
1562             tcg_gen_and_i64(t0, t0, reg);
1563
1564             /* Bound the result.  */
1565             tcg_gen_movi_i64(reg, INT64_MIN);
1566             t2 = tcg_const_i64(0);
1567             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1568         } else {
1569             /* Detect signed overflow for addition.  */
1570             tcg_gen_xor_i64(t0, reg, val);
1571             tcg_gen_add_i64(reg, reg, val);
1572             tcg_gen_xor_i64(t1, reg, val);
1573             tcg_gen_andc_i64(t0, t1, t0);
1574
1575             /* Bound the result.  */
1576             tcg_gen_movi_i64(t1, INT64_MAX);
1577             t2 = tcg_const_i64(0);
1578             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1579         }
1580         tcg_temp_free_i64(t2);
1581     }
1582     tcg_temp_free_i64(t0);
1583     tcg_temp_free_i64(t1);
1584 }
1585
1586 /* Similarly with a vector and a scalar operand.  */
1587 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1588                               TCGv_i64 val, bool u, bool d)
1589 {
1590     unsigned vsz = vec_full_reg_size(s);
1591     TCGv_ptr dptr, nptr;
1592     TCGv_i32 t32, desc;
1593     TCGv_i64 t64;
1594
1595     dptr = tcg_temp_new_ptr();
1596     nptr = tcg_temp_new_ptr();
1597     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1598     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1599     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1600
1601     switch (esz) {
1602     case MO_8:
1603         t32 = tcg_temp_new_i32();
1604         tcg_gen_extrl_i64_i32(t32, val);
1605         if (d) {
1606             tcg_gen_neg_i32(t32, t32);
1607         }
1608         if (u) {
1609             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1610         } else {
1611             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1612         }
1613         tcg_temp_free_i32(t32);
1614         break;
1615
1616     case MO_16:
1617         t32 = tcg_temp_new_i32();
1618         tcg_gen_extrl_i64_i32(t32, val);
1619         if (d) {
1620             tcg_gen_neg_i32(t32, t32);
1621         }
1622         if (u) {
1623             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1624         } else {
1625             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1626         }
1627         tcg_temp_free_i32(t32);
1628         break;
1629
1630     case MO_32:
1631         t64 = tcg_temp_new_i64();
1632         if (d) {
1633             tcg_gen_neg_i64(t64, val);
1634         } else {
1635             tcg_gen_mov_i64(t64, val);
1636         }
1637         if (u) {
1638             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1639         } else {
1640             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1641         }
1642         tcg_temp_free_i64(t64);
1643         break;
1644
1645     case MO_64:
1646         if (u) {
1647             if (d) {
1648                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1649             } else {
1650                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1651             }
1652         } else if (d) {
1653             t64 = tcg_temp_new_i64();
1654             tcg_gen_neg_i64(t64, val);
1655             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1656             tcg_temp_free_i64(t64);
1657         } else {
1658             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1659         }
1660         break;
1661
1662     default:
1663         g_assert_not_reached();
1664     }
1665
1666     tcg_temp_free_ptr(dptr);
1667     tcg_temp_free_ptr(nptr);
1668     tcg_temp_free_i32(desc);
1669 }
1670
1671 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1672 {
1673     if (sve_access_check(s)) {
1674         unsigned fullsz = vec_full_reg_size(s);
1675         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1676         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1677     }
1678     return true;
1679 }
1680
1681 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1682 {
1683     if (sve_access_check(s)) {
1684         unsigned fullsz = vec_full_reg_size(s);
1685         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1686         int inc = numelem * a->imm * (a->d ? -1 : 1);
1687         TCGv_i64 reg = cpu_reg(s, a->rd);
1688
1689         tcg_gen_addi_i64(reg, reg, inc);
1690     }
1691     return true;
1692 }
1693
1694 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1695                                uint32_t insn)
1696 {
1697     if (!sve_access_check(s)) {
1698         return true;
1699     }
1700
1701     unsigned fullsz = vec_full_reg_size(s);
1702     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1703     int inc = numelem * a->imm;
1704     TCGv_i64 reg = cpu_reg(s, a->rd);
1705
1706     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1707     if (inc == 0) {
1708         if (a->u) {
1709             tcg_gen_ext32u_i64(reg, reg);
1710         } else {
1711             tcg_gen_ext32s_i64(reg, reg);
1712         }
1713     } else {
1714         TCGv_i64 t = tcg_const_i64(inc);
1715         do_sat_addsub_32(reg, t, a->u, a->d);
1716         tcg_temp_free_i64(t);
1717     }
1718     return true;
1719 }
1720
1721 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1722                                uint32_t insn)
1723 {
1724     if (!sve_access_check(s)) {
1725         return true;
1726     }
1727
1728     unsigned fullsz = vec_full_reg_size(s);
1729     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1730     int inc = numelem * a->imm;
1731     TCGv_i64 reg = cpu_reg(s, a->rd);
1732
1733     if (inc != 0) {
1734         TCGv_i64 t = tcg_const_i64(inc);
1735         do_sat_addsub_64(reg, t, a->u, a->d);
1736         tcg_temp_free_i64(t);
1737     }
1738     return true;
1739 }
1740
1741 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1742 {
1743     if (a->esz == 0) {
1744         return false;
1745     }
1746
1747     unsigned fullsz = vec_full_reg_size(s);
1748     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1749     int inc = numelem * a->imm;
1750
1751     if (inc != 0) {
1752         if (sve_access_check(s)) {
1753             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1754             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1755                               vec_full_reg_offset(s, a->rn),
1756                               t, fullsz, fullsz);
1757             tcg_temp_free_i64(t);
1758         }
1759     } else {
1760         do_mov_z(s, a->rd, a->rn);
1761     }
1762     return true;
1763 }
1764
1765 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1766                             uint32_t insn)
1767 {
1768     if (a->esz == 0) {
1769         return false;
1770     }
1771
1772     unsigned fullsz = vec_full_reg_size(s);
1773     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1774     int inc = numelem * a->imm;
1775
1776     if (inc != 0) {
1777         if (sve_access_check(s)) {
1778             TCGv_i64 t = tcg_const_i64(inc);
1779             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1780             tcg_temp_free_i64(t);
1781         }
1782     } else {
1783         do_mov_z(s, a->rd, a->rn);
1784     }
1785     return true;
1786 }
1787
1788 /*
1789  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1790  */
1791
1792 /* Subroutine loading a vector register at VOFS of LEN bytes.
1793  * The load should begin at the address Rn + IMM.
1794  */
1795
1796 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1797                    int rn, int imm)
1798 {
1799     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1800     uint32_t len_remain = len % 8;
1801     uint32_t nparts = len / 8 + ctpop8(len_remain);
1802     int midx = get_mem_index(s);
1803     TCGv_i64 addr, t0, t1;
1804
1805     addr = tcg_temp_new_i64();
1806     t0 = tcg_temp_new_i64();
1807
1808     /* Note that unpredicated load/store of vector/predicate registers
1809      * are defined as a stream of bytes, which equates to little-endian
1810      * operations on larger quantities.  There is no nice way to force
1811      * a little-endian load for aarch64_be-linux-user out of line.
1812      *
1813      * Attempt to keep code expansion to a minimum by limiting the
1814      * amount of unrolling done.
1815      */
1816     if (nparts <= 4) {
1817         int i;
1818
1819         for (i = 0; i < len_align; i += 8) {
1820             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1821             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1822             tcg_gen_st_i64(t0, cpu_env, vofs + i);
1823         }
1824     } else {
1825         TCGLabel *loop = gen_new_label();
1826         TCGv_ptr tp, i = tcg_const_local_ptr(0);
1827
1828         gen_set_label(loop);
1829
1830         /* Minimize the number of local temps that must be re-read from
1831          * the stack each iteration.  Instead, re-compute values other
1832          * than the loop counter.
1833          */
1834         tp = tcg_temp_new_ptr();
1835         tcg_gen_addi_ptr(tp, i, imm);
1836         tcg_gen_extu_ptr_i64(addr, tp);
1837         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1838
1839         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1840
1841         tcg_gen_add_ptr(tp, cpu_env, i);
1842         tcg_gen_addi_ptr(i, i, 8);
1843         tcg_gen_st_i64(t0, tp, vofs);
1844         tcg_temp_free_ptr(tp);
1845
1846         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1847         tcg_temp_free_ptr(i);
1848     }
1849
1850     /* Predicate register loads can be any multiple of 2.
1851      * Note that we still store the entire 64-bit unit into cpu_env.
1852      */
1853     if (len_remain) {
1854         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1855
1856         switch (len_remain) {
1857         case 2:
1858         case 4:
1859         case 8:
1860             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1861             break;
1862
1863         case 6:
1864             t1 = tcg_temp_new_i64();
1865             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1866             tcg_gen_addi_i64(addr, addr, 4);
1867             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1868             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1869             tcg_temp_free_i64(t1);
1870             break;
1871
1872         default:
1873             g_assert_not_reached();
1874         }
1875         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1876     }
1877     tcg_temp_free_i64(addr);
1878     tcg_temp_free_i64(t0);
1879 }
1880
1881 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1882 {
1883     if (sve_access_check(s)) {
1884         int size = vec_full_reg_size(s);
1885         int off = vec_full_reg_offset(s, a->rd);
1886         do_ldr(s, off, size, a->rn, a->imm * size);
1887     }
1888     return true;
1889 }
1890
1891 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1892 {
1893     if (sve_access_check(s)) {
1894         int size = pred_full_reg_size(s);
1895         int off = pred_full_reg_offset(s, a->rd);
1896         do_ldr(s, off, size, a->rn, a->imm * size);
1897     }
1898     return true;
1899 }