target/arm/translate-neon.c.inc

   1 /*
   2  *  ARM translation: AArch32 Neon instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2020 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 static inline int plus1(DisasContext *s, int x)
  30 {
  31     return x + 1;
  32 }
  33
  34 static inline int rsub_64(DisasContext *s, int x)
  35 {
  36     return 64 - x;
  37 }
  38
  39 static inline int rsub_32(DisasContext *s, int x)
  40 {
  41     return 32 - x;
  42 }
  43 static inline int rsub_16(DisasContext *s, int x)
  44 {
  45     return 16 - x;
  46 }
  47 static inline int rsub_8(DisasContext *s, int x)
  48 {
  49     return 8 - x;
  50 }
  51
  52 static inline int neon_3same_fp_size(DisasContext *s, int x)
  53 {
  54     /* Convert 0==fp32, 1==fp16 into a MO_* value */
  55     return MO_32 - x;
  56 }
  57
  58 /* Include the generated Neon decoder */
  59 #include "decode-neon-dp.c.inc"
  60 #include "decode-neon-ls.c.inc"
  61 #include "decode-neon-shared.c.inc"
  62
  63 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
  64  * where 0 is the least significant end of the register.
  65  */
  66 static inline long
  67 neon_element_offset(int reg, int element, MemOp size)
  68 {
  69     int element_size = 1 << size;
  70     int ofs = element * element_size;
  71 #ifdef HOST_WORDS_BIGENDIAN
  72     /* Calculate the offset assuming fully little-endian,
  73      * then XOR to account for the order of the 8-byte units.
  74      */
  75     if (element_size < 8) {
  76         ofs ^= 8 - element_size;
  77     }
  78 #endif
  79     return neon_reg_offset(reg, 0) + ofs;
  80 }
  81
  82 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
  83 {
  84     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  85
  86     switch (mop) {
  87     case MO_UB:
  88         tcg_gen_ld8u_i32(var, cpu_env, offset);
  89         break;
  90     case MO_UW:
  91         tcg_gen_ld16u_i32(var, cpu_env, offset);
  92         break;
  93     case MO_UL:
  94         tcg_gen_ld_i32(var, cpu_env, offset);
  95         break;
  96     default:
  97         g_assert_not_reached();
  98     }
  99 }
 100
 101 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
 102 {
 103     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
 104
 105     switch (mop) {
 106     case MO_UB:
 107         tcg_gen_ld8u_i64(var, cpu_env, offset);
 108         break;
 109     case MO_UW:
 110         tcg_gen_ld16u_i64(var, cpu_env, offset);
 111         break;
 112     case MO_UL:
 113         tcg_gen_ld32u_i64(var, cpu_env, offset);
 114         break;
 115     case MO_Q:
 116         tcg_gen_ld_i64(var, cpu_env, offset);
 117         break;
 118     default:
 119         g_assert_not_reached();
 120     }
 121 }
 122
 123 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
 124 {
 125     long offset = neon_element_offset(reg, ele, size);
 126
 127     switch (size) {
 128     case MO_8:
 129         tcg_gen_st8_i32(var, cpu_env, offset);
 130         break;
 131     case MO_16:
 132         tcg_gen_st16_i32(var, cpu_env, offset);
 133         break;
 134     case MO_32:
 135         tcg_gen_st_i32(var, cpu_env, offset);
 136         break;
 137     default:
 138         g_assert_not_reached();
 139     }
 140 }
 141
 142 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
 143 {
 144     long offset = neon_element_offset(reg, ele, size);
 145
 146     switch (size) {
 147     case MO_8:
 148         tcg_gen_st8_i64(var, cpu_env, offset);
 149         break;
 150     case MO_16:
 151         tcg_gen_st16_i64(var, cpu_env, offset);
 152         break;
 153     case MO_32:
 154         tcg_gen_st32_i64(var, cpu_env, offset);
 155         break;
 156     case MO_64:
 157         tcg_gen_st_i64(var, cpu_env, offset);
 158         break;
 159     default:
 160         g_assert_not_reached();
 161     }
 162 }
 163
 164 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
 165 {
 166     int opr_sz;
 167     TCGv_ptr fpst;
 168     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 169
 170     if (!dc_isar_feature(aa32_vcma, s)
 171         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
 172         return false;
 173     }
 174
 175     /* UNDEF accesses to D16-D31 if they don't exist. */
 176     if (!dc_isar_feature(aa32_simd_r32, s) &&
 177         ((a->vd | a->vn | a->vm) & 0x10)) {
 178         return false;
 179     }
 180
 181     if ((a->vn | a->vm | a->vd) & a->q) {
 182         return false;
 183     }
 184
 185     if (!vfp_access_check(s)) {
 186         return true;
 187     }
 188
 189     opr_sz = (1 + a->q) * 8;
 190     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
 191     fn_gvec_ptr = (a->size == MO_16) ?
 192         gen_helper_gvec_fcmlah : gen_helper_gvec_fcmlas;
 193     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 194                        vfp_reg_offset(1, a->vn),
 195                        vfp_reg_offset(1, a->vm),
 196                        fpst, opr_sz, opr_sz, a->rot,
 197                        fn_gvec_ptr);
 198     tcg_temp_free_ptr(fpst);
 199     return true;
 200 }
 201
 202 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
 203 {
 204     int opr_sz;
 205     TCGv_ptr fpst;
 206     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 207
 208     if (!dc_isar_feature(aa32_vcma, s)
 209         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
 210         return false;
 211     }
 212
 213     /* UNDEF accesses to D16-D31 if they don't exist. */
 214     if (!dc_isar_feature(aa32_simd_r32, s) &&
 215         ((a->vd | a->vn | a->vm) & 0x10)) {
 216         return false;
 217     }
 218
 219     if ((a->vn | a->vm | a->vd) & a->q) {
 220         return false;
 221     }
 222
 223     if (!vfp_access_check(s)) {
 224         return true;
 225     }
 226
 227     opr_sz = (1 + a->q) * 8;
 228     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
 229     fn_gvec_ptr = (a->size == MO_16) ?
 230         gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
 231     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 232                        vfp_reg_offset(1, a->vn),
 233                        vfp_reg_offset(1, a->vm),
 234                        fpst, opr_sz, opr_sz, a->rot,
 235                        fn_gvec_ptr);
 236     tcg_temp_free_ptr(fpst);
 237     return true;
 238 }
 239
 240 static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
 241 {
 242     int opr_sz;
 243     gen_helper_gvec_3 *fn_gvec;
 244
 245     if (!dc_isar_feature(aa32_dp, s)) {
 246         return false;
 247     }
 248
 249     /* UNDEF accesses to D16-D31 if they don't exist. */
 250     if (!dc_isar_feature(aa32_simd_r32, s) &&
 251         ((a->vd | a->vn | a->vm) & 0x10)) {
 252         return false;
 253     }
 254
 255     if ((a->vn | a->vm | a->vd) & a->q) {
 256         return false;
 257     }
 258
 259     if (!vfp_access_check(s)) {
 260         return true;
 261     }
 262
 263     opr_sz = (1 + a->q) * 8;
 264     fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
 265     tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
 266                        vfp_reg_offset(1, a->vn),
 267                        vfp_reg_offset(1, a->vm),
 268                        opr_sz, opr_sz, 0, fn_gvec);
 269     return true;
 270 }
 271
 272 static bool trans_VFML(DisasContext *s, arg_VFML *a)
 273 {
 274     int opr_sz;
 275
 276     if (!dc_isar_feature(aa32_fhm, s)) {
 277         return false;
 278     }
 279
 280     /* UNDEF accesses to D16-D31 if they don't exist. */
 281     if (!dc_isar_feature(aa32_simd_r32, s) &&
 282         (a->vd & 0x10)) {
 283         return false;
 284     }
 285
 286     if (a->vd & a->q) {
 287         return false;
 288     }
 289
 290     if (!vfp_access_check(s)) {
 291         return true;
 292     }
 293
 294     opr_sz = (1 + a->q) * 8;
 295     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 296                        vfp_reg_offset(a->q, a->vn),
 297                        vfp_reg_offset(a->q, a->vm),
 298                        cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
 299                        gen_helper_gvec_fmlal_a32);
 300     return true;
 301 }
 302
 303 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
 304 {
 305     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 306     int opr_sz;
 307     TCGv_ptr fpst;
 308
 309     if (!dc_isar_feature(aa32_vcma, s)) {
 310         return false;
 311     }
 312     if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) {
 313         return false;
 314     }
 315
 316     /* UNDEF accesses to D16-D31 if they don't exist. */
 317     if (!dc_isar_feature(aa32_simd_r32, s) &&
 318         ((a->vd | a->vn | a->vm) & 0x10)) {
 319         return false;
 320     }
 321
 322     if ((a->vd | a->vn) & a->q) {
 323         return false;
 324     }
 325
 326     if (!vfp_access_check(s)) {
 327         return true;
 328     }
 329
 330     fn_gvec_ptr = (a->size == MO_16) ?
 331         gen_helper_gvec_fcmlah_idx : gen_helper_gvec_fcmlas_idx;
 332     opr_sz = (1 + a->q) * 8;
 333     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
 334     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 335                        vfp_reg_offset(1, a->vn),
 336                        vfp_reg_offset(1, a->vm),
 337                        fpst, opr_sz, opr_sz,
 338                        (a->index << 2) | a->rot, fn_gvec_ptr);
 339     tcg_temp_free_ptr(fpst);
 340     return true;
 341 }
 342
 343 static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
 344 {
 345     gen_helper_gvec_3 *fn_gvec;
 346     int opr_sz;
 347     TCGv_ptr fpst;
 348
 349     if (!dc_isar_feature(aa32_dp, s)) {
 350         return false;
 351     }
 352
 353     /* UNDEF accesses to D16-D31 if they don't exist. */
 354     if (!dc_isar_feature(aa32_simd_r32, s) &&
 355         ((a->vd | a->vn) & 0x10)) {
 356         return false;
 357     }
 358
 359     if ((a->vd | a->vn) & a->q) {
 360         return false;
 361     }
 362
 363     if (!vfp_access_check(s)) {
 364         return true;
 365     }
 366
 367     fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
 368     opr_sz = (1 + a->q) * 8;
 369     fpst = fpstatus_ptr(FPST_STD);
 370     tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
 371                        vfp_reg_offset(1, a->vn),
 372                        vfp_reg_offset(1, a->rm),
 373                        opr_sz, opr_sz, a->index, fn_gvec);
 374     tcg_temp_free_ptr(fpst);
 375     return true;
 376 }
 377
 378 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
 379 {
 380     int opr_sz;
 381
 382     if (!dc_isar_feature(aa32_fhm, s)) {
 383         return false;
 384     }
 385
 386     /* UNDEF accesses to D16-D31 if they don't exist. */
 387     if (!dc_isar_feature(aa32_simd_r32, s) &&
 388         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
 389         return false;
 390     }
 391
 392     if (a->vd & a->q) {
 393         return false;
 394     }
 395
 396     if (!vfp_access_check(s)) {
 397         return true;
 398     }
 399
 400     opr_sz = (1 + a->q) * 8;
 401     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 402                        vfp_reg_offset(a->q, a->vn),
 403                        vfp_reg_offset(a->q, a->rm),
 404                        cpu_env, opr_sz, opr_sz,
 405                        (a->index << 2) | a->s, /* is_2 == 0 */
 406                        gen_helper_gvec_fmlal_idx_a32);
 407     return true;
 408 }
 409
 410 static struct {
 411     int nregs;
 412     int interleave;
 413     int spacing;
 414 } const neon_ls_element_type[11] = {
 415     {1, 4, 1},
 416     {1, 4, 2},
 417     {4, 1, 1},
 418     {2, 2, 2},
 419     {1, 3, 1},
 420     {1, 3, 2},
 421     {3, 1, 1},
 422     {1, 1, 1},
 423     {1, 2, 1},
 424     {1, 2, 2},
 425     {2, 1, 1}
 426 };
 427
 428 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
 429                                       int stride)
 430 {
 431     if (rm != 15) {
 432         TCGv_i32 base;
 433
 434         base = load_reg(s, rn);
 435         if (rm == 13) {
 436             tcg_gen_addi_i32(base, base, stride);
 437         } else {
 438             TCGv_i32 index;
 439             index = load_reg(s, rm);
 440             tcg_gen_add_i32(base, base, index);
 441             tcg_temp_free_i32(index);
 442         }
 443         store_reg(s, rn, base);
 444     }
 445 }
 446
 447 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
 448 {
 449     /* Neon load/store multiple structures */
 450     int nregs, interleave, spacing, reg, n;
 451     MemOp endian = s->be_data;
 452     int mmu_idx = get_mem_index(s);
 453     int size = a->size;
 454     TCGv_i64 tmp64;
 455     TCGv_i32 addr, tmp;
 456
 457     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 458         return false;
 459     }
 460
 461     /* UNDEF accesses to D16-D31 if they don't exist */
 462     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 463         return false;
 464     }
 465     if (a->itype > 10) {
 466         return false;
 467     }
 468     /* Catch UNDEF cases for bad values of align field */
 469     switch (a->itype & 0xc) {
 470     case 4:
 471         if (a->align >= 2) {
 472             return false;
 473         }
 474         break;
 475     case 8:
 476         if (a->align == 3) {
 477             return false;
 478         }
 479         break;
 480     default:
 481         break;
 482     }
 483     nregs = neon_ls_element_type[a->itype].nregs;
 484     interleave = neon_ls_element_type[a->itype].interleave;
 485     spacing = neon_ls_element_type[a->itype].spacing;
 486     if (size == 3 && (interleave | spacing) != 1) {
 487         return false;
 488     }
 489
 490     if (!vfp_access_check(s)) {
 491         return true;
 492     }
 493
 494     /* For our purposes, bytes are always little-endian.  */
 495     if (size == 0) {
 496         endian = MO_LE;
 497     }
 498     /*
 499      * Consecutive little-endian elements from a single register
 500      * can be promoted to a larger little-endian operation.
 501      */
 502     if (interleave == 1 && endian == MO_LE) {
 503         size = 3;
 504     }
 505     tmp64 = tcg_temp_new_i64();
 506     addr = tcg_temp_new_i32();
 507     tmp = tcg_const_i32(1 << size);
 508     load_reg_var(s, addr, a->rn);
 509     for (reg = 0; reg < nregs; reg++) {
 510         for (n = 0; n < 8 >> size; n++) {
 511             int xs;
 512             for (xs = 0; xs < interleave; xs++) {
 513                 int tt = a->vd + reg + spacing * xs;
 514
 515                 if (a->l) {
 516                     gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
 517                     neon_store_element64(tt, n, size, tmp64);
 518                 } else {
 519                     neon_load_element64(tmp64, tt, n, size);
 520                     gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
 521                 }
 522                 tcg_gen_add_i32(addr, addr, tmp);
 523             }
 524         }
 525     }
 526     tcg_temp_free_i32(addr);
 527     tcg_temp_free_i32(tmp);
 528     tcg_temp_free_i64(tmp64);
 529
 530     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
 531     return true;
 532 }
 533
 534 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
 535 {
 536     /* Neon load single structure to all lanes */
 537     int reg, stride, vec_size;
 538     int vd = a->vd;
 539     int size = a->size;
 540     int nregs = a->n + 1;
 541     TCGv_i32 addr, tmp;
 542
 543     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 544         return false;
 545     }
 546
 547     /* UNDEF accesses to D16-D31 if they don't exist */
 548     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 549         return false;
 550     }
 551
 552     if (size == 3) {
 553         if (nregs != 4 || a->a == 0) {
 554             return false;
 555         }
 556         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
 557         size = 2;
 558     }
 559     if (nregs == 1 && a->a == 1 && size == 0) {
 560         return false;
 561     }
 562     if (nregs == 3 && a->a == 1) {
 563         return false;
 564     }
 565
 566     if (!vfp_access_check(s)) {
 567         return true;
 568     }
 569
 570     /*
 571      * VLD1 to all lanes: T bit indicates how many Dregs to write.
 572      * VLD2/3/4 to all lanes: T bit indicates register stride.
 573      */
 574     stride = a->t ? 2 : 1;
 575     vec_size = nregs == 1 ? stride * 8 : 8;
 576
 577     tmp = tcg_temp_new_i32();
 578     addr = tcg_temp_new_i32();
 579     load_reg_var(s, addr, a->rn);
 580     for (reg = 0; reg < nregs; reg++) {
 581         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
 582                         s->be_data | size);
 583         if ((vd & 1) && vec_size == 16) {
 584             /*
 585              * We cannot write 16 bytes at once because the
 586              * destination is unaligned.
 587              */
 588             tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
 589                                  8, 8, tmp);
 590             tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
 591                              neon_reg_offset(vd, 0), 8, 8);
 592         } else {
 593             tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
 594                                  vec_size, vec_size, tmp);
 595         }
 596         tcg_gen_addi_i32(addr, addr, 1 << size);
 597         vd += stride;
 598     }
 599     tcg_temp_free_i32(tmp);
 600     tcg_temp_free_i32(addr);
 601
 602     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
 603
 604     return true;
 605 }
 606
 607 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
 608 {
 609     /* Neon load/store single structure to one lane */
 610     int reg;
 611     int nregs = a->n + 1;
 612     int vd = a->vd;
 613     TCGv_i32 addr, tmp;
 614
 615     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 616         return false;
 617     }
 618
 619     /* UNDEF accesses to D16-D31 if they don't exist */
 620     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 621         return false;
 622     }
 623
 624     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
 625     switch (nregs) {
 626     case 1:
 627         if (((a->align & (1 << a->size)) != 0) ||
 628             (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
 629             return false;
 630         }
 631         break;
 632     case 3:
 633         if ((a->align & 1) != 0) {
 634             return false;
 635         }
 636         /* fall through */
 637     case 2:
 638         if (a->size == 2 && (a->align & 2) != 0) {
 639             return false;
 640         }
 641         break;
 642     case 4:
 643         if ((a->size == 2) && ((a->align & 3) == 3)) {
 644             return false;
 645         }
 646         break;
 647     default:
 648         abort();
 649     }
 650     if ((vd + a->stride * (nregs - 1)) > 31) {
 651         /*
 652          * Attempts to write off the end of the register file are
 653          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
 654          * access off the end of the array that holds the register data.
 655          */
 656         return false;
 657     }
 658
 659     if (!vfp_access_check(s)) {
 660         return true;
 661     }
 662
 663     tmp = tcg_temp_new_i32();
 664     addr = tcg_temp_new_i32();
 665     load_reg_var(s, addr, a->rn);
 666     /*
 667      * TODO: if we implemented alignment exceptions, we should check
 668      * addr against the alignment encoded in a->align here.
 669      */
 670     for (reg = 0; reg < nregs; reg++) {
 671         if (a->l) {
 672             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
 673                             s->be_data | a->size);
 674             neon_store_element(vd, a->reg_idx, a->size, tmp);
 675         } else { /* Store */
 676             neon_load_element(tmp, vd, a->reg_idx, a->size);
 677             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
 678                             s->be_data | a->size);
 679         }
 680         vd += a->stride;
 681         tcg_gen_addi_i32(addr, addr, 1 << a->size);
 682     }
 683     tcg_temp_free_i32(addr);
 684     tcg_temp_free_i32(tmp);
 685
 686     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
 687
 688     return true;
 689 }
 690
 691 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
 692 {
 693     int vec_size = a->q ? 16 : 8;
 694     int rd_ofs = neon_reg_offset(a->vd, 0);
 695     int rn_ofs = neon_reg_offset(a->vn, 0);
 696     int rm_ofs = neon_reg_offset(a->vm, 0);
 697
 698     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 699         return false;
 700     }
 701
 702     /* UNDEF accesses to D16-D31 if they don't exist. */
 703     if (!dc_isar_feature(aa32_simd_r32, s) &&
 704         ((a->vd | a->vn | a->vm) & 0x10)) {
 705         return false;
 706     }
 707
 708     if ((a->vn | a->vm | a->vd) & a->q) {
 709         return false;
 710     }
 711
 712     if (!vfp_access_check(s)) {
 713         return true;
 714     }
 715
 716     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
 717     return true;
 718 }
 719
 720 #define DO_3SAME(INSN, FUNC)                                            \
 721     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 722     {                                                                   \
 723         return do_3same(s, a, FUNC);                                    \
 724     }
 725
 726 DO_3SAME(VADD, tcg_gen_gvec_add)
 727 DO_3SAME(VSUB, tcg_gen_gvec_sub)
 728 DO_3SAME(VAND, tcg_gen_gvec_and)
 729 DO_3SAME(VBIC, tcg_gen_gvec_andc)
 730 DO_3SAME(VORR, tcg_gen_gvec_or)
 731 DO_3SAME(VORN, tcg_gen_gvec_orc)
 732 DO_3SAME(VEOR, tcg_gen_gvec_xor)
 733 DO_3SAME(VSHL_S, gen_gvec_sshl)
 734 DO_3SAME(VSHL_U, gen_gvec_ushl)
 735 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
 736 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
 737 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
 738 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
 739
 740 /* These insns are all gvec_bitsel but with the inputs in various orders. */
 741 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
 742     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 743                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 744                                 uint32_t oprsz, uint32_t maxsz)         \
 745     {                                                                   \
 746         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
 747     }                                                                   \
 748     DO_3SAME(INSN, gen_##INSN##_3s)
 749
 750 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
 751 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
 752 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
 753
 754 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
 755     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 756     {                                                                   \
 757         if (a->size == 3) {                                             \
 758             return false;                                               \
 759         }                                                               \
 760         return do_3same(s, a, FUNC);                                    \
 761     }
 762
 763 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
 764 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
 765 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
 766 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
 767 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
 768 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
 769 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
 770 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
 771 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
 772 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
 773 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
 774 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
 775
 776 #define DO_3SAME_CMP(INSN, COND)                                        \
 777     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 778                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 779                                 uint32_t oprsz, uint32_t maxsz)         \
 780     {                                                                   \
 781         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
 782     }                                                                   \
 783     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
 784
 785 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
 786 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
 787 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
 788 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
 789 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
 790
 791 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
 792     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
 793                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
 794     {                                                                      \
 795         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
 796     }
 797
 798 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
 799
 800 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
 801 {
 802     if (a->size != 0) {
 803         return false;
 804     }
 805     return do_3same(s, a, gen_VMUL_p_3s);
 806 }
 807
 808 #define DO_VQRDMLAH(INSN, FUNC)                                         \
 809     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 810     {                                                                   \
 811         if (!dc_isar_feature(aa32_rdm, s)) {                            \
 812             return false;                                               \
 813         }                                                               \
 814         if (a->size != 1 && a->size != 2) {                             \
 815             return false;                                               \
 816         }                                                               \
 817         return do_3same(s, a, FUNC);                                    \
 818     }
 819
 820 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
 821 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
 822
 823 #define DO_SHA1(NAME, FUNC)                                             \
 824     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 825     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 826     {                                                                   \
 827         if (!dc_isar_feature(aa32_sha1, s)) {                           \
 828             return false;                                               \
 829         }                                                               \
 830         return do_3same(s, a, gen_##NAME##_3s);                         \
 831     }
 832
 833 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
 834 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
 835 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
 836 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
 837
 838 #define DO_SHA2(NAME, FUNC)                                             \
 839     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 840     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 841     {                                                                   \
 842         if (!dc_isar_feature(aa32_sha2, s)) {                           \
 843             return false;                                               \
 844         }                                                               \
 845         return do_3same(s, a, gen_##NAME##_3s);                         \
 846     }
 847
 848 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
 849 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
 850 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
 851
 852 #define DO_3SAME_64(INSN, FUNC)                                         \
 853     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 854                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 855                                 uint32_t oprsz, uint32_t maxsz)         \
 856     {                                                                   \
 857         static const GVecGen3 op = { .fni8 = FUNC };                    \
 858         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
 859     }                                                                   \
 860     DO_3SAME(INSN, gen_##INSN##_3s)
 861
 862 #define DO_3SAME_64_ENV(INSN, FUNC)                                     \
 863     static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
 864     {                                                                   \
 865         FUNC(d, cpu_env, n, m);                                         \
 866     }                                                                   \
 867     DO_3SAME_64(INSN, gen_##INSN##_elt)
 868
 869 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
 870 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
 871 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
 872 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
 873 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
 874 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
 875
 876 #define DO_3SAME_32(INSN, FUNC)                                         \
 877     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 878                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 879                                 uint32_t oprsz, uint32_t maxsz)         \
 880     {                                                                   \
 881         static const GVecGen3 ops[4] = {                                \
 882             { .fni4 = gen_helper_neon_##FUNC##8 },                      \
 883             { .fni4 = gen_helper_neon_##FUNC##16 },                     \
 884             { .fni4 = gen_helper_neon_##FUNC##32 },                     \
 885             { 0 },                                                      \
 886         };                                                              \
 887         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 888     }                                                                   \
 889     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 890     {                                                                   \
 891         if (a->size > 2) {                                              \
 892             return false;                                               \
 893         }                                                               \
 894         return do_3same(s, a, gen_##INSN##_3s);                         \
 895     }
 896
 897 /*
 898  * Some helper functions need to be passed the cpu_env. In order
 899  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
 900  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
 901  * and which call a NeonGenTwoOpEnvFn().
 902  */
 903 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
 904     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
 905     {                                                                   \
 906         FUNC(d, cpu_env, n, m);                                         \
 907     }
 908
 909 #define DO_3SAME_32_ENV(INSN, FUNC)                                     \
 910     WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
 911     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
 912     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
 913     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 914                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 915                                 uint32_t oprsz, uint32_t maxsz)         \
 916     {                                                                   \
 917         static const GVecGen3 ops[4] = {                                \
 918             { .fni4 = gen_##INSN##_tramp8 },                            \
 919             { .fni4 = gen_##INSN##_tramp16 },                           \
 920             { .fni4 = gen_##INSN##_tramp32 },                           \
 921             { 0 },                                                      \
 922         };                                                              \
 923         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 924     }                                                                   \
 925     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 926     {                                                                   \
 927         if (a->size > 2) {                                              \
 928             return false;                                               \
 929         }                                                               \
 930         return do_3same(s, a, gen_##INSN##_3s);                         \
 931     }
 932
 933 DO_3SAME_32(VHADD_S, hadd_s)
 934 DO_3SAME_32(VHADD_U, hadd_u)
 935 DO_3SAME_32(VHSUB_S, hsub_s)
 936 DO_3SAME_32(VHSUB_U, hsub_u)
 937 DO_3SAME_32(VRHADD_S, rhadd_s)
 938 DO_3SAME_32(VRHADD_U, rhadd_u)
 939 DO_3SAME_32(VRSHL_S, rshl_s)
 940 DO_3SAME_32(VRSHL_U, rshl_u)
 941
 942 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
 943 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
 944 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
 945 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
 946
 947 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
 948 {
 949     /* Operations handled pairwise 32 bits at a time */
 950     TCGv_i32 tmp, tmp2, tmp3;
 951
 952     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 953         return false;
 954     }
 955
 956     /* UNDEF accesses to D16-D31 if they don't exist. */
 957     if (!dc_isar_feature(aa32_simd_r32, s) &&
 958         ((a->vd | a->vn | a->vm) & 0x10)) {
 959         return false;
 960     }
 961
 962     if (a->size == 3) {
 963         return false;
 964     }
 965
 966     if (!vfp_access_check(s)) {
 967         return true;
 968     }
 969
 970     assert(a->q == 0); /* enforced by decode patterns */
 971
 972     /*
 973      * Note that we have to be careful not to clobber the source operands
 974      * in the "vm == vd" case by storing the result of the first pass too
 975      * early. Since Q is 0 there are always just two passes, so instead
 976      * of a complicated loop over each pass we just unroll.
 977      */
 978     tmp = neon_load_reg(a->vn, 0);
 979     tmp2 = neon_load_reg(a->vn, 1);
 980     fn(tmp, tmp, tmp2);
 981     tcg_temp_free_i32(tmp2);
 982
 983     tmp3 = neon_load_reg(a->vm, 0);
 984     tmp2 = neon_load_reg(a->vm, 1);
 985     fn(tmp3, tmp3, tmp2);
 986     tcg_temp_free_i32(tmp2);
 987
 988     neon_store_reg(a->vd, 0, tmp);
 989     neon_store_reg(a->vd, 1, tmp3);
 990     return true;
 991 }
 992
 993 #define DO_3SAME_PAIR(INSN, func)                                       \
 994     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 995     {                                                                   \
 996         static NeonGenTwoOpFn * const fns[] = {                         \
 997             gen_helper_neon_##func##8,                                  \
 998             gen_helper_neon_##func##16,                                 \
 999             gen_helper_neon_##func##32,                                 \
1000         };                                                              \
1001         if (a->size > 2) {                                              \
1002             return false;                                               \
1003         }                                                               \
1004         return do_3same_pair(s, a, fns[a->size]);                       \
1005     }
1006
1007 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
1008 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
1009 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
1010 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
1011 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
1012 #define gen_helper_neon_padd_u32  tcg_gen_add_i32
1013
1014 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1015 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1016 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1017 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1018 DO_3SAME_PAIR(VPADD, padd_u)
1019
1020 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
1021     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
1022     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
1023     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
1024                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
1025                                 uint32_t oprsz, uint32_t maxsz)         \
1026     {                                                                   \
1027         static const GVecGen3 ops[2] = {                                \
1028             { .fni4 = gen_##INSN##_tramp16 },                           \
1029             { .fni4 = gen_##INSN##_tramp32 },                           \
1030         };                                                              \
1031         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1032     }                                                                   \
1033     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1034     {                                                                   \
1035         if (a->size != 1 && a->size != 2) {                             \
1036             return false;                                               \
1037         }                                                               \
1038         return do_3same(s, a, gen_##INSN##_3s);                         \
1039     }
1040
1041 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1042 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1043
1044 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
1045     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
1046                          uint32_t rn_ofs, uint32_t rm_ofs,              \
1047                          uint32_t oprsz, uint32_t maxsz)                \
1048     {                                                                   \
1049         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
1050         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
1051                            oprsz, maxsz, 0, FUNC);                      \
1052         tcg_temp_free_ptr(fpst);                                        \
1053     }
1054
1055 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
1056     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
1057     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
1058     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
1059     {                                                                   \
1060         if (a->size == MO_16) {                                         \
1061             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
1062                 return false;                                           \
1063             }                                                           \
1064             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
1065         }                                                               \
1066         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
1067     }
1068
1069
1070 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1071 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1072 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1073 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1074 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1075 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1076 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1077 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1078 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1079 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1080 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1081 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1082 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1083 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1084 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1085 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1086 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1087
1088 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1089 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1090 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1091 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1092
1093 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1094 {
1095     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1096         return false;
1097     }
1098
1099     if (a->size == MO_16) {
1100         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1101             return false;
1102         }
1103         return do_3same(s, a, gen_VMAXNM_fp16_3s);
1104     }
1105     return do_3same(s, a, gen_VMAXNM_fp32_3s);
1106 }
1107
1108 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1109 {
1110     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1111         return false;
1112     }
1113
1114     if (a->size == MO_16) {
1115         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1116             return false;
1117         }
1118         return do_3same(s, a, gen_VMINNM_fp16_3s);
1119     }
1120     return do_3same(s, a, gen_VMINNM_fp32_3s);
1121 }
1122
1123 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1124                              gen_helper_gvec_3_ptr *fn)
1125 {
1126     /* FP pairwise operations */
1127     TCGv_ptr fpstatus;
1128
1129     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1130         return false;
1131     }
1132
1133     /* UNDEF accesses to D16-D31 if they don't exist. */
1134     if (!dc_isar_feature(aa32_simd_r32, s) &&
1135         ((a->vd | a->vn | a->vm) & 0x10)) {
1136         return false;
1137     }
1138
1139     if (!vfp_access_check(s)) {
1140         return true;
1141     }
1142
1143     assert(a->q == 0); /* enforced by decode patterns */
1144
1145
1146     fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1147     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1148                        vfp_reg_offset(1, a->vn),
1149                        vfp_reg_offset(1, a->vm),
1150                        fpstatus, 8, 8, 0, fn);
1151     tcg_temp_free_ptr(fpstatus);
1152
1153     return true;
1154 }
1155
1156 /*
1157  * For all the functions using this macro, size == 1 means fp16,
1158  * which is an architecture extension we don't implement yet.
1159  */
1160 #define DO_3S_FP_PAIR(INSN,FUNC)                                    \
1161     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1162     {                                                               \
1163         if (a->size == MO_16) {                                     \
1164             if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
1165                 return false;                                       \
1166             }                                                       \
1167             return do_3same_fp_pair(s, a, FUNC##h);                 \
1168         }                                                           \
1169         return do_3same_fp_pair(s, a, FUNC##s);                     \
1170     }
1171
1172 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1173 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1174 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1175
1176 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1177 {
1178     /* Handle a 2-reg-shift insn which can be vectorized. */
1179     int vec_size = a->q ? 16 : 8;
1180     int rd_ofs = neon_reg_offset(a->vd, 0);
1181     int rm_ofs = neon_reg_offset(a->vm, 0);
1182
1183     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1184         return false;
1185     }
1186
1187     /* UNDEF accesses to D16-D31 if they don't exist. */
1188     if (!dc_isar_feature(aa32_simd_r32, s) &&
1189         ((a->vd | a->vm) & 0x10)) {
1190         return false;
1191     }
1192
1193     if ((a->vm | a->vd) & a->q) {
1194         return false;
1195     }
1196
1197     if (!vfp_access_check(s)) {
1198         return true;
1199     }
1200
1201     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1202     return true;
1203 }
1204
1205 #define DO_2SH(INSN, FUNC)                                              \
1206     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1207     {                                                                   \
1208         return do_vector_2sh(s, a, FUNC);                               \
1209     }                                                                   \
1210
1211 DO_2SH(VSHL, tcg_gen_gvec_shli)
1212 DO_2SH(VSLI, gen_gvec_sli)
1213 DO_2SH(VSRI, gen_gvec_sri)
1214 DO_2SH(VSRA_S, gen_gvec_ssra)
1215 DO_2SH(VSRA_U, gen_gvec_usra)
1216 DO_2SH(VRSHR_S, gen_gvec_srshr)
1217 DO_2SH(VRSHR_U, gen_gvec_urshr)
1218 DO_2SH(VRSRA_S, gen_gvec_srsra)
1219 DO_2SH(VRSRA_U, gen_gvec_ursra)
1220
1221 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1222 {
1223     /* Signed shift out of range results in all-sign-bits */
1224     a->shift = MIN(a->shift, (8 << a->size) - 1);
1225     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1226 }
1227
1228 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1229                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1230 {
1231     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1232 }
1233
1234 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1235 {
1236     /* Shift out of range is architecturally valid and results in zero. */
1237     if (a->shift >= (8 << a->size)) {
1238         return do_vector_2sh(s, a, gen_zero_rd_2sh);
1239     } else {
1240         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1241     }
1242 }
1243
1244 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1245                              NeonGenTwo64OpEnvFn *fn)
1246 {
1247     /*
1248      * 2-reg-and-shift operations, size == 3 case, where the
1249      * function needs to be passed cpu_env.
1250      */
1251     TCGv_i64 constimm;
1252     int pass;
1253
1254     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1255         return false;
1256     }
1257
1258     /* UNDEF accesses to D16-D31 if they don't exist. */
1259     if (!dc_isar_feature(aa32_simd_r32, s) &&
1260         ((a->vd | a->vm) & 0x10)) {
1261         return false;
1262     }
1263
1264     if ((a->vm | a->vd) & a->q) {
1265         return false;
1266     }
1267
1268     if (!vfp_access_check(s)) {
1269         return true;
1270     }
1271
1272     /*
1273      * To avoid excessive duplication of ops we implement shift
1274      * by immediate using the variable shift operations.
1275      */
1276     constimm = tcg_const_i64(dup_const(a->size, a->shift));
1277
1278     for (pass = 0; pass < a->q + 1; pass++) {
1279         TCGv_i64 tmp = tcg_temp_new_i64();
1280
1281         neon_load_reg64(tmp, a->vm + pass);
1282         fn(tmp, cpu_env, tmp, constimm);
1283         neon_store_reg64(tmp, a->vd + pass);
1284         tcg_temp_free_i64(tmp);
1285     }
1286     tcg_temp_free_i64(constimm);
1287     return true;
1288 }
1289
1290 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1291                              NeonGenTwoOpEnvFn *fn)
1292 {
1293     /*
1294      * 2-reg-and-shift operations, size < 3 case, where the
1295      * helper needs to be passed cpu_env.
1296      */
1297     TCGv_i32 constimm;
1298     int pass;
1299
1300     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1301         return false;
1302     }
1303
1304     /* UNDEF accesses to D16-D31 if they don't exist. */
1305     if (!dc_isar_feature(aa32_simd_r32, s) &&
1306         ((a->vd | a->vm) & 0x10)) {
1307         return false;
1308     }
1309
1310     if ((a->vm | a->vd) & a->q) {
1311         return false;
1312     }
1313
1314     if (!vfp_access_check(s)) {
1315         return true;
1316     }
1317
1318     /*
1319      * To avoid excessive duplication of ops we implement shift
1320      * by immediate using the variable shift operations.
1321      */
1322     constimm = tcg_const_i32(dup_const(a->size, a->shift));
1323
1324     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1325         TCGv_i32 tmp = neon_load_reg(a->vm, pass);
1326         fn(tmp, cpu_env, tmp, constimm);
1327         neon_store_reg(a->vd, pass, tmp);
1328     }
1329     tcg_temp_free_i32(constimm);
1330     return true;
1331 }
1332
1333 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1334     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1335     {                                                                   \
1336         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1337     }                                                                   \
1338     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1339     {                                                                   \
1340         static NeonGenTwoOpEnvFn * const fns[] = {                      \
1341             gen_helper_neon_##FUNC##8,                                  \
1342             gen_helper_neon_##FUNC##16,                                 \
1343             gen_helper_neon_##FUNC##32,                                 \
1344         };                                                              \
1345         assert(a->size < ARRAY_SIZE(fns));                              \
1346         return do_2shift_env_32(s, a, fns[a->size]);                    \
1347     }
1348
1349 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1350 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1351 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1352
1353 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1354                                 NeonGenTwo64OpFn *shiftfn,
1355                                 NeonGenNarrowEnvFn *narrowfn)
1356 {
1357     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1358     TCGv_i64 constimm, rm1, rm2;
1359     TCGv_i32 rd;
1360
1361     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1362         return false;
1363     }
1364
1365     /* UNDEF accesses to D16-D31 if they don't exist. */
1366     if (!dc_isar_feature(aa32_simd_r32, s) &&
1367         ((a->vd | a->vm) & 0x10)) {
1368         return false;
1369     }
1370
1371     if (a->vm & 1) {
1372         return false;
1373     }
1374
1375     if (!vfp_access_check(s)) {
1376         return true;
1377     }
1378
1379     /*
1380      * This is always a right shift, and the shiftfn is always a
1381      * left-shift helper, which thus needs the negated shift count.
1382      */
1383     constimm = tcg_const_i64(-a->shift);
1384     rm1 = tcg_temp_new_i64();
1385     rm2 = tcg_temp_new_i64();
1386
1387     /* Load both inputs first to avoid potential overwrite if rm == rd */
1388     neon_load_reg64(rm1, a->vm);
1389     neon_load_reg64(rm2, a->vm + 1);
1390
1391     shiftfn(rm1, rm1, constimm);
1392     rd = tcg_temp_new_i32();
1393     narrowfn(rd, cpu_env, rm1);
1394     neon_store_reg(a->vd, 0, rd);
1395
1396     shiftfn(rm2, rm2, constimm);
1397     rd = tcg_temp_new_i32();
1398     narrowfn(rd, cpu_env, rm2);
1399     neon_store_reg(a->vd, 1, rd);
1400
1401     tcg_temp_free_i64(rm1);
1402     tcg_temp_free_i64(rm2);
1403     tcg_temp_free_i64(constimm);
1404
1405     return true;
1406 }
1407
1408 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1409                                 NeonGenTwoOpFn *shiftfn,
1410                                 NeonGenNarrowEnvFn *narrowfn)
1411 {
1412     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1413     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1414     TCGv_i64 rtmp;
1415     uint32_t imm;
1416
1417     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1418         return false;
1419     }
1420
1421     /* UNDEF accesses to D16-D31 if they don't exist. */
1422     if (!dc_isar_feature(aa32_simd_r32, s) &&
1423         ((a->vd | a->vm) & 0x10)) {
1424         return false;
1425     }
1426
1427     if (a->vm & 1) {
1428         return false;
1429     }
1430
1431     if (!vfp_access_check(s)) {
1432         return true;
1433     }
1434
1435     /*
1436      * This is always a right shift, and the shiftfn is always a
1437      * left-shift helper, which thus needs the negated shift count
1438      * duplicated into each lane of the immediate value.
1439      */
1440     if (a->size == 1) {
1441         imm = (uint16_t)(-a->shift);
1442         imm |= imm << 16;
1443     } else {
1444         /* size == 2 */
1445         imm = -a->shift;
1446     }
1447     constimm = tcg_const_i32(imm);
1448
1449     /* Load all inputs first to avoid potential overwrite */
1450     rm1 = neon_load_reg(a->vm, 0);
1451     rm2 = neon_load_reg(a->vm, 1);
1452     rm3 = neon_load_reg(a->vm + 1, 0);
1453     rm4 = neon_load_reg(a->vm + 1, 1);
1454     rtmp = tcg_temp_new_i64();
1455
1456     shiftfn(rm1, rm1, constimm);
1457     shiftfn(rm2, rm2, constimm);
1458
1459     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1460     tcg_temp_free_i32(rm2);
1461
1462     narrowfn(rm1, cpu_env, rtmp);
1463     neon_store_reg(a->vd, 0, rm1);
1464
1465     shiftfn(rm3, rm3, constimm);
1466     shiftfn(rm4, rm4, constimm);
1467     tcg_temp_free_i32(constimm);
1468
1469     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1470     tcg_temp_free_i32(rm4);
1471
1472     narrowfn(rm3, cpu_env, rtmp);
1473     tcg_temp_free_i64(rtmp);
1474     neon_store_reg(a->vd, 1, rm3);
1475     return true;
1476 }
1477
1478 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1479     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1480     {                                                                   \
1481         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1482     }
1483 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1484     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1485     {                                                                   \
1486         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1487     }
1488
1489 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1490 {
1491     tcg_gen_extrl_i64_i32(dest, src);
1492 }
1493
1494 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1495 {
1496     gen_helper_neon_narrow_u16(dest, src);
1497 }
1498
1499 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1500 {
1501     gen_helper_neon_narrow_u8(dest, src);
1502 }
1503
1504 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1505 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1506 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1507
1508 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1509 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1510 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1511
1512 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1513 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1514 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1515
1516 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1517 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1518 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1519 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1520 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1521 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1522
1523 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1524 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1525 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1526
1527 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1528 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1529 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1530
1531 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1532 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1533 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1534
1535 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1536                          NeonGenWidenFn *widenfn, bool u)
1537 {
1538     TCGv_i64 tmp;
1539     TCGv_i32 rm0, rm1;
1540     uint64_t widen_mask = 0;
1541
1542     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1543         return false;
1544     }
1545
1546     /* UNDEF accesses to D16-D31 if they don't exist. */
1547     if (!dc_isar_feature(aa32_simd_r32, s) &&
1548         ((a->vd | a->vm) & 0x10)) {
1549         return false;
1550     }
1551
1552     if (a->vd & 1) {
1553         return false;
1554     }
1555
1556     if (!vfp_access_check(s)) {
1557         return true;
1558     }
1559
1560     /*
1561      * This is a widen-and-shift operation. The shift is always less
1562      * than the width of the source type, so after widening the input
1563      * vector we can simply shift the whole 64-bit widened register,
1564      * and then clear the potential overflow bits resulting from left
1565      * bits of the narrow input appearing as right bits of the left
1566      * neighbour narrow input. Calculate a mask of bits to clear.
1567      */
1568     if ((a->shift != 0) && (a->size < 2 || u)) {
1569         int esize = 8 << a->size;
1570         widen_mask = MAKE_64BIT_MASK(0, esize);
1571         widen_mask >>= esize - a->shift;
1572         widen_mask = dup_const(a->size + 1, widen_mask);
1573     }
1574
1575     rm0 = neon_load_reg(a->vm, 0);
1576     rm1 = neon_load_reg(a->vm, 1);
1577     tmp = tcg_temp_new_i64();
1578
1579     widenfn(tmp, rm0);
1580     tcg_temp_free_i32(rm0);
1581     if (a->shift != 0) {
1582         tcg_gen_shli_i64(tmp, tmp, a->shift);
1583         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1584     }
1585     neon_store_reg64(tmp, a->vd);
1586
1587     widenfn(tmp, rm1);
1588     tcg_temp_free_i32(rm1);
1589     if (a->shift != 0) {
1590         tcg_gen_shli_i64(tmp, tmp, a->shift);
1591         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1592     }
1593     neon_store_reg64(tmp, a->vd + 1);
1594     tcg_temp_free_i64(tmp);
1595     return true;
1596 }
1597
1598 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1599 {
1600     static NeonGenWidenFn * const widenfn[] = {
1601         gen_helper_neon_widen_s8,
1602         gen_helper_neon_widen_s16,
1603         tcg_gen_ext_i32_i64,
1604     };
1605     return do_vshll_2sh(s, a, widenfn[a->size], false);
1606 }
1607
1608 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1609 {
1610     static NeonGenWidenFn * const widenfn[] = {
1611         gen_helper_neon_widen_u8,
1612         gen_helper_neon_widen_u16,
1613         tcg_gen_extu_i32_i64,
1614     };
1615     return do_vshll_2sh(s, a, widenfn[a->size], true);
1616 }
1617
1618 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1619                       gen_helper_gvec_2_ptr *fn)
1620 {
1621     /* FP operations in 2-reg-and-shift group */
1622     int vec_size = a->q ? 16 : 8;
1623     int rd_ofs = neon_reg_offset(a->vd, 0);
1624     int rm_ofs = neon_reg_offset(a->vm, 0);
1625     TCGv_ptr fpst;
1626
1627     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1628         return false;
1629     }
1630
1631     if (a->size == MO_16) {
1632         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1633             return false;
1634         }
1635     }
1636
1637     /* UNDEF accesses to D16-D31 if they don't exist. */
1638     if (!dc_isar_feature(aa32_simd_r32, s) &&
1639         ((a->vd | a->vm) & 0x10)) {
1640         return false;
1641     }
1642
1643     if ((a->vm | a->vd) & a->q) {
1644         return false;
1645     }
1646
1647     if (!vfp_access_check(s)) {
1648         return true;
1649     }
1650
1651     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1652     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1653     tcg_temp_free_ptr(fpst);
1654     return true;
1655 }
1656
1657 #define DO_FP_2SH(INSN, FUNC)                                           \
1658     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1659     {                                                                   \
1660         return do_fp_2sh(s, a, FUNC);                                   \
1661     }
1662
1663 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1664 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1665 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1666 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1667
1668 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1669 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1670 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1671 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1672
1673 static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
1674 {
1675     /*
1676      * Expand the encoded constant.
1677      * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
1678      * We choose to not special-case this and will behave as if a
1679      * valid constant encoding of 0 had been given.
1680      * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
1681      */
1682     switch (cmode) {
1683     case 0: case 1:
1684         /* no-op */
1685         break;
1686     case 2: case 3:
1687         imm <<= 8;
1688         break;
1689     case 4: case 5:
1690         imm <<= 16;
1691         break;
1692     case 6: case 7:
1693         imm <<= 24;
1694         break;
1695     case 8: case 9:
1696         imm |= imm << 16;
1697         break;
1698     case 10: case 11:
1699         imm = (imm << 8) | (imm << 24);
1700         break;
1701     case 12:
1702         imm = (imm << 8) | 0xff;
1703         break;
1704     case 13:
1705         imm = (imm << 16) | 0xffff;
1706         break;
1707     case 14:
1708         if (op) {
1709             /*
1710              * This is the only case where the top and bottom 32 bits
1711              * of the encoded constant differ.
1712              */
1713             uint64_t imm64 = 0;
1714             int n;
1715
1716             for (n = 0; n < 8; n++) {
1717                 if (imm & (1 << n)) {
1718                     imm64 |= (0xffULL << (n * 8));
1719                 }
1720             }
1721             return imm64;
1722         }
1723         imm |= (imm << 8) | (imm << 16) | (imm << 24);
1724         break;
1725     case 15:
1726         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
1727             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
1728         break;
1729     }
1730     if (op) {
1731         imm = ~imm;
1732     }
1733     return dup_const(MO_32, imm);
1734 }
1735
1736 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1737                         GVecGen2iFn *fn)
1738 {
1739     uint64_t imm;
1740     int reg_ofs, vec_size;
1741
1742     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1743         return false;
1744     }
1745
1746     /* UNDEF accesses to D16-D31 if they don't exist. */
1747     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1748         return false;
1749     }
1750
1751     if (a->vd & a->q) {
1752         return false;
1753     }
1754
1755     if (!vfp_access_check(s)) {
1756         return true;
1757     }
1758
1759     reg_ofs = neon_reg_offset(a->vd, 0);
1760     vec_size = a->q ? 16 : 8;
1761     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1762
1763     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1764     return true;
1765 }
1766
1767 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1768                         int64_t c, uint32_t oprsz, uint32_t maxsz)
1769 {
1770     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1771 }
1772
1773 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1774 {
1775     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1776     GVecGen2iFn *fn;
1777
1778     if ((a->cmode & 1) && a->cmode < 12) {
1779         /* for op=1, the imm will be inverted, so BIC becomes AND. */
1780         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1781     } else {
1782         /* There is one unallocated cmode/op combination in this space */
1783         if (a->cmode == 15 && a->op == 1) {
1784             return false;
1785         }
1786         fn = gen_VMOV_1r;
1787     }
1788     return do_1reg_imm(s, a, fn);
1789 }
1790
1791 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1792                            NeonGenWidenFn *widenfn,
1793                            NeonGenTwo64OpFn *opfn,
1794                            bool src1_wide)
1795 {
1796     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1797     TCGv_i64 rn0_64, rn1_64, rm_64;
1798     TCGv_i32 rm;
1799
1800     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1801         return false;
1802     }
1803
1804     /* UNDEF accesses to D16-D31 if they don't exist. */
1805     if (!dc_isar_feature(aa32_simd_r32, s) &&
1806         ((a->vd | a->vn | a->vm) & 0x10)) {
1807         return false;
1808     }
1809
1810     if (!widenfn || !opfn) {
1811         /* size == 3 case, which is an entirely different insn group */
1812         return false;
1813     }
1814
1815     if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
1816         return false;
1817     }
1818
1819     if (!vfp_access_check(s)) {
1820         return true;
1821     }
1822
1823     rn0_64 = tcg_temp_new_i64();
1824     rn1_64 = tcg_temp_new_i64();
1825     rm_64 = tcg_temp_new_i64();
1826
1827     if (src1_wide) {
1828         neon_load_reg64(rn0_64, a->vn);
1829     } else {
1830         TCGv_i32 tmp = neon_load_reg(a->vn, 0);
1831         widenfn(rn0_64, tmp);
1832         tcg_temp_free_i32(tmp);
1833     }
1834     rm = neon_load_reg(a->vm, 0);
1835
1836     widenfn(rm_64, rm);
1837     tcg_temp_free_i32(rm);
1838     opfn(rn0_64, rn0_64, rm_64);
1839
1840     /*
1841      * Load second pass inputs before storing the first pass result, to
1842      * avoid incorrect results if a narrow input overlaps with the result.
1843      */
1844     if (src1_wide) {
1845         neon_load_reg64(rn1_64, a->vn + 1);
1846     } else {
1847         TCGv_i32 tmp = neon_load_reg(a->vn, 1);
1848         widenfn(rn1_64, tmp);
1849         tcg_temp_free_i32(tmp);
1850     }
1851     rm = neon_load_reg(a->vm, 1);
1852
1853     neon_store_reg64(rn0_64, a->vd);
1854
1855     widenfn(rm_64, rm);
1856     tcg_temp_free_i32(rm);
1857     opfn(rn1_64, rn1_64, rm_64);
1858     neon_store_reg64(rn1_64, a->vd + 1);
1859
1860     tcg_temp_free_i64(rn0_64);
1861     tcg_temp_free_i64(rn1_64);
1862     tcg_temp_free_i64(rm_64);
1863
1864     return true;
1865 }
1866
1867 #define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE)                         \
1868     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1869     {                                                                   \
1870         static NeonGenWidenFn * const widenfn[] = {                     \
1871             gen_helper_neon_widen_##S##8,                               \
1872             gen_helper_neon_widen_##S##16,                              \
1873             tcg_gen_##EXT##_i32_i64,                                    \
1874             NULL,                                                       \
1875         };                                                              \
1876         static NeonGenTwo64OpFn * const addfn[] = {                     \
1877             gen_helper_neon_##OP##l_u16,                                \
1878             gen_helper_neon_##OP##l_u32,                                \
1879             tcg_gen_##OP##_i64,                                         \
1880             NULL,                                                       \
1881         };                                                              \
1882         return do_prewiden_3d(s, a, widenfn[a->size],                   \
1883                               addfn[a->size], SRC1WIDE);                \
1884     }
1885
1886 DO_PREWIDEN(VADDL_S, s, ext, add, false)
1887 DO_PREWIDEN(VADDL_U, u, extu, add, false)
1888 DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
1889 DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
1890 DO_PREWIDEN(VADDW_S, s, ext, add, true)
1891 DO_PREWIDEN(VADDW_U, u, extu, add, true)
1892 DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
1893 DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
1894
1895 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1896                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1897 {
1898     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1899     TCGv_i64 rn_64, rm_64;
1900     TCGv_i32 rd0, rd1;
1901
1902     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1903         return false;
1904     }
1905
1906     /* UNDEF accesses to D16-D31 if they don't exist. */
1907     if (!dc_isar_feature(aa32_simd_r32, s) &&
1908         ((a->vd | a->vn | a->vm) & 0x10)) {
1909         return false;
1910     }
1911
1912     if (!opfn || !narrowfn) {
1913         /* size == 3 case, which is an entirely different insn group */
1914         return false;
1915     }
1916
1917     if ((a->vn | a->vm) & 1) {
1918         return false;
1919     }
1920
1921     if (!vfp_access_check(s)) {
1922         return true;
1923     }
1924
1925     rn_64 = tcg_temp_new_i64();
1926     rm_64 = tcg_temp_new_i64();
1927     rd0 = tcg_temp_new_i32();
1928     rd1 = tcg_temp_new_i32();
1929
1930     neon_load_reg64(rn_64, a->vn);
1931     neon_load_reg64(rm_64, a->vm);
1932
1933     opfn(rn_64, rn_64, rm_64);
1934
1935     narrowfn(rd0, rn_64);
1936
1937     neon_load_reg64(rn_64, a->vn + 1);
1938     neon_load_reg64(rm_64, a->vm + 1);
1939
1940     opfn(rn_64, rn_64, rm_64);
1941
1942     narrowfn(rd1, rn_64);
1943
1944     neon_store_reg(a->vd, 0, rd0);
1945     neon_store_reg(a->vd, 1, rd1);
1946
1947     tcg_temp_free_i64(rn_64);
1948     tcg_temp_free_i64(rm_64);
1949
1950     return true;
1951 }
1952
1953 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1954     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1955     {                                                                   \
1956         static NeonGenTwo64OpFn * const addfn[] = {                     \
1957             gen_helper_neon_##OP##l_u16,                                \
1958             gen_helper_neon_##OP##l_u32,                                \
1959             tcg_gen_##OP##_i64,                                         \
1960             NULL,                                                       \
1961         };                                                              \
1962         static NeonGenNarrowFn * const narrowfn[] = {                   \
1963             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1964             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1965             EXTOP,                                                      \
1966             NULL,                                                       \
1967         };                                                              \
1968         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1969     }
1970
1971 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1972 {
1973     tcg_gen_addi_i64(rn, rn, 1u << 31);
1974     tcg_gen_extrh_i64_i32(rd, rn);
1975 }
1976
1977 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1978 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1979 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1980 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1981
1982 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1983                        NeonGenTwoOpWidenFn *opfn,
1984                        NeonGenTwo64OpFn *accfn)
1985 {
1986     /*
1987      * 3-regs different lengths, long operations.
1988      * These perform an operation on two inputs that returns a double-width
1989      * result, and then possibly perform an accumulation operation of
1990      * that result into the double-width destination.
1991      */
1992     TCGv_i64 rd0, rd1, tmp;
1993     TCGv_i32 rn, rm;
1994
1995     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1996         return false;
1997     }
1998
1999     /* UNDEF accesses to D16-D31 if they don't exist. */
2000     if (!dc_isar_feature(aa32_simd_r32, s) &&
2001         ((a->vd | a->vn | a->vm) & 0x10)) {
2002         return false;
2003     }
2004
2005     if (!opfn) {
2006         /* size == 3 case, which is an entirely different insn group */
2007         return false;
2008     }
2009
2010     if (a->vd & 1) {
2011         return false;
2012     }
2013
2014     if (!vfp_access_check(s)) {
2015         return true;
2016     }
2017
2018     rd0 = tcg_temp_new_i64();
2019     rd1 = tcg_temp_new_i64();
2020
2021     rn = neon_load_reg(a->vn, 0);
2022     rm = neon_load_reg(a->vm, 0);
2023     opfn(rd0, rn, rm);
2024     tcg_temp_free_i32(rn);
2025     tcg_temp_free_i32(rm);
2026
2027     rn = neon_load_reg(a->vn, 1);
2028     rm = neon_load_reg(a->vm, 1);
2029     opfn(rd1, rn, rm);
2030     tcg_temp_free_i32(rn);
2031     tcg_temp_free_i32(rm);
2032
2033     /* Don't store results until after all loads: they might overlap */
2034     if (accfn) {
2035         tmp = tcg_temp_new_i64();
2036         neon_load_reg64(tmp, a->vd);
2037         accfn(tmp, tmp, rd0);
2038         neon_store_reg64(tmp, a->vd);
2039         neon_load_reg64(tmp, a->vd + 1);
2040         accfn(tmp, tmp, rd1);
2041         neon_store_reg64(tmp, a->vd + 1);
2042         tcg_temp_free_i64(tmp);
2043     } else {
2044         neon_store_reg64(rd0, a->vd);
2045         neon_store_reg64(rd1, a->vd + 1);
2046     }
2047
2048     tcg_temp_free_i64(rd0);
2049     tcg_temp_free_i64(rd1);
2050
2051     return true;
2052 }
2053
2054 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2055 {
2056     static NeonGenTwoOpWidenFn * const opfn[] = {
2057         gen_helper_neon_abdl_s16,
2058         gen_helper_neon_abdl_s32,
2059         gen_helper_neon_abdl_s64,
2060         NULL,
2061     };
2062
2063     return do_long_3d(s, a, opfn[a->size], NULL);
2064 }
2065
2066 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2067 {
2068     static NeonGenTwoOpWidenFn * const opfn[] = {
2069         gen_helper_neon_abdl_u16,
2070         gen_helper_neon_abdl_u32,
2071         gen_helper_neon_abdl_u64,
2072         NULL,
2073     };
2074
2075     return do_long_3d(s, a, opfn[a->size], NULL);
2076 }
2077
2078 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2079 {
2080     static NeonGenTwoOpWidenFn * const opfn[] = {
2081         gen_helper_neon_abdl_s16,
2082         gen_helper_neon_abdl_s32,
2083         gen_helper_neon_abdl_s64,
2084         NULL,
2085     };
2086     static NeonGenTwo64OpFn * const addfn[] = {
2087         gen_helper_neon_addl_u16,
2088         gen_helper_neon_addl_u32,
2089         tcg_gen_add_i64,
2090         NULL,
2091     };
2092
2093     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2094 }
2095
2096 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2097 {
2098     static NeonGenTwoOpWidenFn * const opfn[] = {
2099         gen_helper_neon_abdl_u16,
2100         gen_helper_neon_abdl_u32,
2101         gen_helper_neon_abdl_u64,
2102         NULL,
2103     };
2104     static NeonGenTwo64OpFn * const addfn[] = {
2105         gen_helper_neon_addl_u16,
2106         gen_helper_neon_addl_u32,
2107         tcg_gen_add_i64,
2108         NULL,
2109     };
2110
2111     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2112 }
2113
2114 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2115 {
2116     TCGv_i32 lo = tcg_temp_new_i32();
2117     TCGv_i32 hi = tcg_temp_new_i32();
2118
2119     tcg_gen_muls2_i32(lo, hi, rn, rm);
2120     tcg_gen_concat_i32_i64(rd, lo, hi);
2121
2122     tcg_temp_free_i32(lo);
2123     tcg_temp_free_i32(hi);
2124 }
2125
2126 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2127 {
2128     TCGv_i32 lo = tcg_temp_new_i32();
2129     TCGv_i32 hi = tcg_temp_new_i32();
2130
2131     tcg_gen_mulu2_i32(lo, hi, rn, rm);
2132     tcg_gen_concat_i32_i64(rd, lo, hi);
2133
2134     tcg_temp_free_i32(lo);
2135     tcg_temp_free_i32(hi);
2136 }
2137
2138 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2139 {
2140     static NeonGenTwoOpWidenFn * const opfn[] = {
2141         gen_helper_neon_mull_s8,
2142         gen_helper_neon_mull_s16,
2143         gen_mull_s32,
2144         NULL,
2145     };
2146
2147     return do_long_3d(s, a, opfn[a->size], NULL);
2148 }
2149
2150 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2151 {
2152     static NeonGenTwoOpWidenFn * const opfn[] = {
2153         gen_helper_neon_mull_u8,
2154         gen_helper_neon_mull_u16,
2155         gen_mull_u32,
2156         NULL,
2157     };
2158
2159     return do_long_3d(s, a, opfn[a->size], NULL);
2160 }
2161
2162 #define DO_VMLAL(INSN,MULL,ACC)                                         \
2163     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
2164     {                                                                   \
2165         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2166             gen_helper_neon_##MULL##8,                                  \
2167             gen_helper_neon_##MULL##16,                                 \
2168             gen_##MULL##32,                                             \
2169             NULL,                                                       \
2170         };                                                              \
2171         static NeonGenTwo64OpFn * const accfn[] = {                     \
2172             gen_helper_neon_##ACC##l_u16,                               \
2173             gen_helper_neon_##ACC##l_u32,                               \
2174             tcg_gen_##ACC##_i64,                                        \
2175             NULL,                                                       \
2176         };                                                              \
2177         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
2178     }
2179
2180 DO_VMLAL(VMLAL_S,mull_s,add)
2181 DO_VMLAL(VMLAL_U,mull_u,add)
2182 DO_VMLAL(VMLSL_S,mull_s,sub)
2183 DO_VMLAL(VMLSL_U,mull_u,sub)
2184
2185 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2186 {
2187     gen_helper_neon_mull_s16(rd, rn, rm);
2188     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2189 }
2190
2191 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2192 {
2193     gen_mull_s32(rd, rn, rm);
2194     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2195 }
2196
2197 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2198 {
2199     static NeonGenTwoOpWidenFn * const opfn[] = {
2200         NULL,
2201         gen_VQDMULL_16,
2202         gen_VQDMULL_32,
2203         NULL,
2204     };
2205
2206     return do_long_3d(s, a, opfn[a->size], NULL);
2207 }
2208
2209 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2210 {
2211     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2212 }
2213
2214 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2215 {
2216     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2217 }
2218
2219 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2220 {
2221     static NeonGenTwoOpWidenFn * const opfn[] = {
2222         NULL,
2223         gen_VQDMULL_16,
2224         gen_VQDMULL_32,
2225         NULL,
2226     };
2227     static NeonGenTwo64OpFn * const accfn[] = {
2228         NULL,
2229         gen_VQDMLAL_acc_16,
2230         gen_VQDMLAL_acc_32,
2231         NULL,
2232     };
2233
2234     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2235 }
2236
2237 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2238 {
2239     gen_helper_neon_negl_u32(rm, rm);
2240     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2241 }
2242
2243 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2244 {
2245     tcg_gen_neg_i64(rm, rm);
2246     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2247 }
2248
2249 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2250 {
2251     static NeonGenTwoOpWidenFn * const opfn[] = {
2252         NULL,
2253         gen_VQDMULL_16,
2254         gen_VQDMULL_32,
2255         NULL,
2256     };
2257     static NeonGenTwo64OpFn * const accfn[] = {
2258         NULL,
2259         gen_VQDMLSL_acc_16,
2260         gen_VQDMLSL_acc_32,
2261         NULL,
2262     };
2263
2264     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2265 }
2266
2267 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2268 {
2269     gen_helper_gvec_3 *fn_gvec;
2270
2271     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2272         return false;
2273     }
2274
2275     /* UNDEF accesses to D16-D31 if they don't exist. */
2276     if (!dc_isar_feature(aa32_simd_r32, s) &&
2277         ((a->vd | a->vn | a->vm) & 0x10)) {
2278         return false;
2279     }
2280
2281     if (a->vd & 1) {
2282         return false;
2283     }
2284
2285     switch (a->size) {
2286     case 0:
2287         fn_gvec = gen_helper_neon_pmull_h;
2288         break;
2289     case 2:
2290         if (!dc_isar_feature(aa32_pmull, s)) {
2291             return false;
2292         }
2293         fn_gvec = gen_helper_gvec_pmull_q;
2294         break;
2295     default:
2296         return false;
2297     }
2298
2299     if (!vfp_access_check(s)) {
2300         return true;
2301     }
2302
2303     tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
2304                        neon_reg_offset(a->vn, 0),
2305                        neon_reg_offset(a->vm, 0),
2306                        16, 16, 0, fn_gvec);
2307     return true;
2308 }
2309
2310 static void gen_neon_dup_low16(TCGv_i32 var)
2311 {
2312     TCGv_i32 tmp = tcg_temp_new_i32();
2313     tcg_gen_ext16u_i32(var, var);
2314     tcg_gen_shli_i32(tmp, var, 16);
2315     tcg_gen_or_i32(var, var, tmp);
2316     tcg_temp_free_i32(tmp);
2317 }
2318
2319 static void gen_neon_dup_high16(TCGv_i32 var)
2320 {
2321     TCGv_i32 tmp = tcg_temp_new_i32();
2322     tcg_gen_andi_i32(var, var, 0xffff0000);
2323     tcg_gen_shri_i32(tmp, var, 16);
2324     tcg_gen_or_i32(var, var, tmp);
2325     tcg_temp_free_i32(tmp);
2326 }
2327
2328 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2329 {
2330     TCGv_i32 tmp;
2331     if (size == 1) {
2332         tmp = neon_load_reg(reg & 7, reg >> 4);
2333         if (reg & 8) {
2334             gen_neon_dup_high16(tmp);
2335         } else {
2336             gen_neon_dup_low16(tmp);
2337         }
2338     } else {
2339         tmp = neon_load_reg(reg & 15, reg >> 4);
2340     }
2341     return tmp;
2342 }
2343
2344 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2345                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2346 {
2347     /*
2348      * Two registers and a scalar: perform an operation between
2349      * the input elements and the scalar, and then possibly
2350      * perform an accumulation operation of that result into the
2351      * destination.
2352      */
2353     TCGv_i32 scalar;
2354     int pass;
2355
2356     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2357         return false;
2358     }
2359
2360     /* UNDEF accesses to D16-D31 if they don't exist. */
2361     if (!dc_isar_feature(aa32_simd_r32, s) &&
2362         ((a->vd | a->vn | a->vm) & 0x10)) {
2363         return false;
2364     }
2365
2366     if (!opfn) {
2367         /* Bad size (including size == 3, which is a different insn group) */
2368         return false;
2369     }
2370
2371     if (a->q && ((a->vd | a->vn) & 1)) {
2372         return false;
2373     }
2374
2375     if (!vfp_access_check(s)) {
2376         return true;
2377     }
2378
2379     scalar = neon_get_scalar(a->size, a->vm);
2380
2381     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2382         TCGv_i32 tmp = neon_load_reg(a->vn, pass);
2383         opfn(tmp, tmp, scalar);
2384         if (accfn) {
2385             TCGv_i32 rd = neon_load_reg(a->vd, pass);
2386             accfn(tmp, rd, tmp);
2387             tcg_temp_free_i32(rd);
2388         }
2389         neon_store_reg(a->vd, pass, tmp);
2390     }
2391     tcg_temp_free_i32(scalar);
2392     return true;
2393 }
2394
2395 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2396 {
2397     static NeonGenTwoOpFn * const opfn[] = {
2398         NULL,
2399         gen_helper_neon_mul_u16,
2400         tcg_gen_mul_i32,
2401         NULL,
2402     };
2403
2404     return do_2scalar(s, a, opfn[a->size], NULL);
2405 }
2406
2407 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2408 {
2409     static NeonGenTwoOpFn * const opfn[] = {
2410         NULL,
2411         gen_helper_neon_mul_u16,
2412         tcg_gen_mul_i32,
2413         NULL,
2414     };
2415     static NeonGenTwoOpFn * const accfn[] = {
2416         NULL,
2417         gen_helper_neon_add_u16,
2418         tcg_gen_add_i32,
2419         NULL,
2420     };
2421
2422     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2423 }
2424
2425 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2426 {
2427     static NeonGenTwoOpFn * const opfn[] = {
2428         NULL,
2429         gen_helper_neon_mul_u16,
2430         tcg_gen_mul_i32,
2431         NULL,
2432     };
2433     static NeonGenTwoOpFn * const accfn[] = {
2434         NULL,
2435         gen_helper_neon_sub_u16,
2436         tcg_gen_sub_i32,
2437         NULL,
2438     };
2439
2440     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2441 }
2442
2443 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2444                               gen_helper_gvec_3_ptr *fn)
2445 {
2446     /* Two registers and a scalar, using gvec */
2447     int vec_size = a->q ? 16 : 8;
2448     int rd_ofs = neon_reg_offset(a->vd, 0);
2449     int rn_ofs = neon_reg_offset(a->vn, 0);
2450     int rm_ofs;
2451     int idx;
2452     TCGv_ptr fpstatus;
2453
2454     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2455         return false;
2456     }
2457
2458     /* UNDEF accesses to D16-D31 if they don't exist. */
2459     if (!dc_isar_feature(aa32_simd_r32, s) &&
2460         ((a->vd | a->vn | a->vm) & 0x10)) {
2461         return false;
2462     }
2463
2464     if (!fn) {
2465         /* Bad size (including size == 3, which is a different insn group) */
2466         return false;
2467     }
2468
2469     if (a->q && ((a->vd | a->vn) & 1)) {
2470         return false;
2471     }
2472
2473     if (!vfp_access_check(s)) {
2474         return true;
2475     }
2476
2477     /* a->vm is M:Vm, which encodes both register and index */
2478     idx = extract32(a->vm, a->size + 2, 2);
2479     a->vm = extract32(a->vm, 0, a->size + 2);
2480     rm_ofs = neon_reg_offset(a->vm, 0);
2481
2482     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2483     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2484                        vec_size, vec_size, idx, fn);
2485     tcg_temp_free_ptr(fpstatus);
2486     return true;
2487 }
2488
2489 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2490     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2491     {                                                                   \
2492         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2493             NULL,                                                       \
2494             gen_helper_##FUNC##_h,                                      \
2495             gen_helper_##FUNC##_s,                                      \
2496             NULL,                                                       \
2497         };                                                              \
2498         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2499             return false;                                               \
2500         }                                                               \
2501         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2502     }
2503
2504 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2505 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2506 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2507
2508 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2509 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2510 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2511 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2512
2513 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2514 {
2515     static NeonGenTwoOpFn * const opfn[] = {
2516         NULL,
2517         gen_VQDMULH_16,
2518         gen_VQDMULH_32,
2519         NULL,
2520     };
2521
2522     return do_2scalar(s, a, opfn[a->size], NULL);
2523 }
2524
2525 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2526 {
2527     static NeonGenTwoOpFn * const opfn[] = {
2528         NULL,
2529         gen_VQRDMULH_16,
2530         gen_VQRDMULH_32,
2531         NULL,
2532     };
2533
2534     return do_2scalar(s, a, opfn[a->size], NULL);
2535 }
2536
2537 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2538                             NeonGenThreeOpEnvFn *opfn)
2539 {
2540     /*
2541      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2542      * performs a kind of fused op-then-accumulate using a helper
2543      * function that takes all of rd, rn and the scalar at once.
2544      */
2545     TCGv_i32 scalar;
2546     int pass;
2547
2548     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2549         return false;
2550     }
2551
2552     if (!dc_isar_feature(aa32_rdm, s)) {
2553         return false;
2554     }
2555
2556     /* UNDEF accesses to D16-D31 if they don't exist. */
2557     if (!dc_isar_feature(aa32_simd_r32, s) &&
2558         ((a->vd | a->vn | a->vm) & 0x10)) {
2559         return false;
2560     }
2561
2562     if (!opfn) {
2563         /* Bad size (including size == 3, which is a different insn group) */
2564         return false;
2565     }
2566
2567     if (a->q && ((a->vd | a->vn) & 1)) {
2568         return false;
2569     }
2570
2571     if (!vfp_access_check(s)) {
2572         return true;
2573     }
2574
2575     scalar = neon_get_scalar(a->size, a->vm);
2576
2577     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2578         TCGv_i32 rn = neon_load_reg(a->vn, pass);
2579         TCGv_i32 rd = neon_load_reg(a->vd, pass);
2580         opfn(rd, cpu_env, rn, scalar, rd);
2581         tcg_temp_free_i32(rn);
2582         neon_store_reg(a->vd, pass, rd);
2583     }
2584     tcg_temp_free_i32(scalar);
2585
2586     return true;
2587 }
2588
2589 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2590 {
2591     static NeonGenThreeOpEnvFn *opfn[] = {
2592         NULL,
2593         gen_helper_neon_qrdmlah_s16,
2594         gen_helper_neon_qrdmlah_s32,
2595         NULL,
2596     };
2597     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2598 }
2599
2600 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2601 {
2602     static NeonGenThreeOpEnvFn *opfn[] = {
2603         NULL,
2604         gen_helper_neon_qrdmlsh_s16,
2605         gen_helper_neon_qrdmlsh_s32,
2606         NULL,
2607     };
2608     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2609 }
2610
2611 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2612                             NeonGenTwoOpWidenFn *opfn,
2613                             NeonGenTwo64OpFn *accfn)
2614 {
2615     /*
2616      * Two registers and a scalar, long operations: perform an
2617      * operation on the input elements and the scalar which produces
2618      * a double-width result, and then possibly perform an accumulation
2619      * operation of that result into the destination.
2620      */
2621     TCGv_i32 scalar, rn;
2622     TCGv_i64 rn0_64, rn1_64;
2623
2624     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2625         return false;
2626     }
2627
2628     /* UNDEF accesses to D16-D31 if they don't exist. */
2629     if (!dc_isar_feature(aa32_simd_r32, s) &&
2630         ((a->vd | a->vn | a->vm) & 0x10)) {
2631         return false;
2632     }
2633
2634     if (!opfn) {
2635         /* Bad size (including size == 3, which is a different insn group) */
2636         return false;
2637     }
2638
2639     if (a->vd & 1) {
2640         return false;
2641     }
2642
2643     if (!vfp_access_check(s)) {
2644         return true;
2645     }
2646
2647     scalar = neon_get_scalar(a->size, a->vm);
2648
2649     /* Load all inputs before writing any outputs, in case of overlap */
2650     rn = neon_load_reg(a->vn, 0);
2651     rn0_64 = tcg_temp_new_i64();
2652     opfn(rn0_64, rn, scalar);
2653     tcg_temp_free_i32(rn);
2654
2655     rn = neon_load_reg(a->vn, 1);
2656     rn1_64 = tcg_temp_new_i64();
2657     opfn(rn1_64, rn, scalar);
2658     tcg_temp_free_i32(rn);
2659     tcg_temp_free_i32(scalar);
2660
2661     if (accfn) {
2662         TCGv_i64 t64 = tcg_temp_new_i64();
2663         neon_load_reg64(t64, a->vd);
2664         accfn(t64, t64, rn0_64);
2665         neon_store_reg64(t64, a->vd);
2666         neon_load_reg64(t64, a->vd + 1);
2667         accfn(t64, t64, rn1_64);
2668         neon_store_reg64(t64, a->vd + 1);
2669         tcg_temp_free_i64(t64);
2670     } else {
2671         neon_store_reg64(rn0_64, a->vd);
2672         neon_store_reg64(rn1_64, a->vd + 1);
2673     }
2674     tcg_temp_free_i64(rn0_64);
2675     tcg_temp_free_i64(rn1_64);
2676     return true;
2677 }
2678
2679 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2680 {
2681     static NeonGenTwoOpWidenFn * const opfn[] = {
2682         NULL,
2683         gen_helper_neon_mull_s16,
2684         gen_mull_s32,
2685         NULL,
2686     };
2687
2688     return do_2scalar_long(s, a, opfn[a->size], NULL);
2689 }
2690
2691 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2692 {
2693     static NeonGenTwoOpWidenFn * const opfn[] = {
2694         NULL,
2695         gen_helper_neon_mull_u16,
2696         gen_mull_u32,
2697         NULL,
2698     };
2699
2700     return do_2scalar_long(s, a, opfn[a->size], NULL);
2701 }
2702
2703 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2704     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2705     {                                                                   \
2706         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2707             NULL,                                                       \
2708             gen_helper_neon_##MULL##16,                                 \
2709             gen_##MULL##32,                                             \
2710             NULL,                                                       \
2711         };                                                              \
2712         static NeonGenTwo64OpFn * const accfn[] = {                     \
2713             NULL,                                                       \
2714             gen_helper_neon_##ACC##l_u32,                               \
2715             tcg_gen_##ACC##_i64,                                        \
2716             NULL,                                                       \
2717         };                                                              \
2718         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2719     }
2720
2721 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2722 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2723 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2724 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2725
2726 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2727 {
2728     static NeonGenTwoOpWidenFn * const opfn[] = {
2729         NULL,
2730         gen_VQDMULL_16,
2731         gen_VQDMULL_32,
2732         NULL,
2733     };
2734
2735     return do_2scalar_long(s, a, opfn[a->size], NULL);
2736 }
2737
2738 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2739 {
2740     static NeonGenTwoOpWidenFn * const opfn[] = {
2741         NULL,
2742         gen_VQDMULL_16,
2743         gen_VQDMULL_32,
2744         NULL,
2745     };
2746     static NeonGenTwo64OpFn * const accfn[] = {
2747         NULL,
2748         gen_VQDMLAL_acc_16,
2749         gen_VQDMLAL_acc_32,
2750         NULL,
2751     };
2752
2753     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2754 }
2755
2756 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2757 {
2758     static NeonGenTwoOpWidenFn * const opfn[] = {
2759         NULL,
2760         gen_VQDMULL_16,
2761         gen_VQDMULL_32,
2762         NULL,
2763     };
2764     static NeonGenTwo64OpFn * const accfn[] = {
2765         NULL,
2766         gen_VQDMLSL_acc_16,
2767         gen_VQDMLSL_acc_32,
2768         NULL,
2769     };
2770
2771     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2772 }
2773
2774 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2775 {
2776     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2777         return false;
2778     }
2779
2780     /* UNDEF accesses to D16-D31 if they don't exist. */
2781     if (!dc_isar_feature(aa32_simd_r32, s) &&
2782         ((a->vd | a->vn | a->vm) & 0x10)) {
2783         return false;
2784     }
2785
2786     if ((a->vn | a->vm | a->vd) & a->q) {
2787         return false;
2788     }
2789
2790     if (a->imm > 7 && !a->q) {
2791         return false;
2792     }
2793
2794     if (!vfp_access_check(s)) {
2795         return true;
2796     }
2797
2798     if (!a->q) {
2799         /* Extract 64 bits from <Vm:Vn> */
2800         TCGv_i64 left, right, dest;
2801
2802         left = tcg_temp_new_i64();
2803         right = tcg_temp_new_i64();
2804         dest = tcg_temp_new_i64();
2805
2806         neon_load_reg64(right, a->vn);
2807         neon_load_reg64(left, a->vm);
2808         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2809         neon_store_reg64(dest, a->vd);
2810
2811         tcg_temp_free_i64(left);
2812         tcg_temp_free_i64(right);
2813         tcg_temp_free_i64(dest);
2814     } else {
2815         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2816         TCGv_i64 left, middle, right, destleft, destright;
2817
2818         left = tcg_temp_new_i64();
2819         middle = tcg_temp_new_i64();
2820         right = tcg_temp_new_i64();
2821         destleft = tcg_temp_new_i64();
2822         destright = tcg_temp_new_i64();
2823
2824         if (a->imm < 8) {
2825             neon_load_reg64(right, a->vn);
2826             neon_load_reg64(middle, a->vn + 1);
2827             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2828             neon_load_reg64(left, a->vm);
2829             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2830         } else {
2831             neon_load_reg64(right, a->vn + 1);
2832             neon_load_reg64(middle, a->vm);
2833             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2834             neon_load_reg64(left, a->vm + 1);
2835             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2836         }
2837
2838         neon_store_reg64(destright, a->vd);
2839         neon_store_reg64(destleft, a->vd + 1);
2840
2841         tcg_temp_free_i64(destright);
2842         tcg_temp_free_i64(destleft);
2843         tcg_temp_free_i64(right);
2844         tcg_temp_free_i64(middle);
2845         tcg_temp_free_i64(left);
2846     }
2847     return true;
2848 }
2849
2850 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2851 {
2852     int n;
2853     TCGv_i32 tmp, tmp2, tmp3, tmp4;
2854     TCGv_ptr ptr1;
2855
2856     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2857         return false;
2858     }
2859
2860     /* UNDEF accesses to D16-D31 if they don't exist. */
2861     if (!dc_isar_feature(aa32_simd_r32, s) &&
2862         ((a->vd | a->vn | a->vm) & 0x10)) {
2863         return false;
2864     }
2865
2866     if (!vfp_access_check(s)) {
2867         return true;
2868     }
2869
2870     n = a->len + 1;
2871     if ((a->vn + n) > 32) {
2872         /*
2873          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2874          * helper function running off the end of the register file.
2875          */
2876         return false;
2877     }
2878     n <<= 3;
2879     if (a->op) {
2880         tmp = neon_load_reg(a->vd, 0);
2881     } else {
2882         tmp = tcg_temp_new_i32();
2883         tcg_gen_movi_i32(tmp, 0);
2884     }
2885     tmp2 = neon_load_reg(a->vm, 0);
2886     ptr1 = vfp_reg_ptr(true, a->vn);
2887     tmp4 = tcg_const_i32(n);
2888     gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
2889     tcg_temp_free_i32(tmp);
2890     if (a->op) {
2891         tmp = neon_load_reg(a->vd, 1);
2892     } else {
2893         tmp = tcg_temp_new_i32();
2894         tcg_gen_movi_i32(tmp, 0);
2895     }
2896     tmp3 = neon_load_reg(a->vm, 1);
2897     gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
2898     tcg_temp_free_i32(tmp4);
2899     tcg_temp_free_ptr(ptr1);
2900     neon_store_reg(a->vd, 0, tmp2);
2901     neon_store_reg(a->vd, 1, tmp3);
2902     tcg_temp_free_i32(tmp);
2903     return true;
2904 }
2905
2906 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2907 {
2908     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2909         return false;
2910     }
2911
2912     /* UNDEF accesses to D16-D31 if they don't exist. */
2913     if (!dc_isar_feature(aa32_simd_r32, s) &&
2914         ((a->vd | a->vm) & 0x10)) {
2915         return false;
2916     }
2917
2918     if (a->vd & a->q) {
2919         return false;
2920     }
2921
2922     if (!vfp_access_check(s)) {
2923         return true;
2924     }
2925
2926     tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
2927                          neon_element_offset(a->vm, a->index, a->size),
2928                          a->q ? 16 : 8, a->q ? 16 : 8);
2929     return true;
2930 }
2931
2932 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2933 {
2934     int pass, half;
2935
2936     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2937         return false;
2938     }
2939
2940     /* UNDEF accesses to D16-D31 if they don't exist. */
2941     if (!dc_isar_feature(aa32_simd_r32, s) &&
2942         ((a->vd | a->vm) & 0x10)) {
2943         return false;
2944     }
2945
2946     if ((a->vd | a->vm) & a->q) {
2947         return false;
2948     }
2949
2950     if (a->size == 3) {
2951         return false;
2952     }
2953
2954     if (!vfp_access_check(s)) {
2955         return true;
2956     }
2957
2958     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2959         TCGv_i32 tmp[2];
2960
2961         for (half = 0; half < 2; half++) {
2962             tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
2963             switch (a->size) {
2964             case 0:
2965                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2966                 break;
2967             case 1:
2968                 gen_swap_half(tmp[half], tmp[half]);
2969                 break;
2970             case 2:
2971                 break;
2972             default:
2973                 g_assert_not_reached();
2974             }
2975         }
2976         neon_store_reg(a->vd, pass * 2, tmp[1]);
2977         neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
2978     }
2979     return true;
2980 }
2981
2982 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2983                               NeonGenWidenFn *widenfn,
2984                               NeonGenTwo64OpFn *opfn,
2985                               NeonGenTwo64OpFn *accfn)
2986 {
2987     /*
2988      * Pairwise long operations: widen both halves of the pair,
2989      * combine the pairs with the opfn, and then possibly accumulate
2990      * into the destination with the accfn.
2991      */
2992     int pass;
2993
2994     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2995         return false;
2996     }
2997
2998     /* UNDEF accesses to D16-D31 if they don't exist. */
2999     if (!dc_isar_feature(aa32_simd_r32, s) &&
3000         ((a->vd | a->vm) & 0x10)) {
3001         return false;
3002     }
3003
3004     if ((a->vd | a->vm) & a->q) {
3005         return false;
3006     }
3007
3008     if (!widenfn) {
3009         return false;
3010     }
3011
3012     if (!vfp_access_check(s)) {
3013         return true;
3014     }
3015
3016     for (pass = 0; pass < a->q + 1; pass++) {
3017         TCGv_i32 tmp;
3018         TCGv_i64 rm0_64, rm1_64, rd_64;
3019
3020         rm0_64 = tcg_temp_new_i64();
3021         rm1_64 = tcg_temp_new_i64();
3022         rd_64 = tcg_temp_new_i64();
3023         tmp = neon_load_reg(a->vm, pass * 2);
3024         widenfn(rm0_64, tmp);
3025         tcg_temp_free_i32(tmp);
3026         tmp = neon_load_reg(a->vm, pass * 2 + 1);
3027         widenfn(rm1_64, tmp);
3028         tcg_temp_free_i32(tmp);
3029         opfn(rd_64, rm0_64, rm1_64);
3030         tcg_temp_free_i64(rm0_64);
3031         tcg_temp_free_i64(rm1_64);
3032
3033         if (accfn) {
3034             TCGv_i64 tmp64 = tcg_temp_new_i64();
3035             neon_load_reg64(tmp64, a->vd + pass);
3036             accfn(rd_64, tmp64, rd_64);
3037             tcg_temp_free_i64(tmp64);
3038         }
3039         neon_store_reg64(rd_64, a->vd + pass);
3040         tcg_temp_free_i64(rd_64);
3041     }
3042     return true;
3043 }
3044
3045 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3046 {
3047     static NeonGenWidenFn * const widenfn[] = {
3048         gen_helper_neon_widen_s8,
3049         gen_helper_neon_widen_s16,
3050         tcg_gen_ext_i32_i64,
3051         NULL,
3052     };
3053     static NeonGenTwo64OpFn * const opfn[] = {
3054         gen_helper_neon_paddl_u16,
3055         gen_helper_neon_paddl_u32,
3056         tcg_gen_add_i64,
3057         NULL,
3058     };
3059
3060     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3061 }
3062
3063 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3064 {
3065     static NeonGenWidenFn * const widenfn[] = {
3066         gen_helper_neon_widen_u8,
3067         gen_helper_neon_widen_u16,
3068         tcg_gen_extu_i32_i64,
3069         NULL,
3070     };
3071     static NeonGenTwo64OpFn * const opfn[] = {
3072         gen_helper_neon_paddl_u16,
3073         gen_helper_neon_paddl_u32,
3074         tcg_gen_add_i64,
3075         NULL,
3076     };
3077
3078     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3079 }
3080
3081 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3082 {
3083     static NeonGenWidenFn * const widenfn[] = {
3084         gen_helper_neon_widen_s8,
3085         gen_helper_neon_widen_s16,
3086         tcg_gen_ext_i32_i64,
3087         NULL,
3088     };
3089     static NeonGenTwo64OpFn * const opfn[] = {
3090         gen_helper_neon_paddl_u16,
3091         gen_helper_neon_paddl_u32,
3092         tcg_gen_add_i64,
3093         NULL,
3094     };
3095     static NeonGenTwo64OpFn * const accfn[] = {
3096         gen_helper_neon_addl_u16,
3097         gen_helper_neon_addl_u32,
3098         tcg_gen_add_i64,
3099         NULL,
3100     };
3101
3102     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3103                              accfn[a->size]);
3104 }
3105
3106 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3107 {
3108     static NeonGenWidenFn * const widenfn[] = {
3109         gen_helper_neon_widen_u8,
3110         gen_helper_neon_widen_u16,
3111         tcg_gen_extu_i32_i64,
3112         NULL,
3113     };
3114     static NeonGenTwo64OpFn * const opfn[] = {
3115         gen_helper_neon_paddl_u16,
3116         gen_helper_neon_paddl_u32,
3117         tcg_gen_add_i64,
3118         NULL,
3119     };
3120     static NeonGenTwo64OpFn * const accfn[] = {
3121         gen_helper_neon_addl_u16,
3122         gen_helper_neon_addl_u32,
3123         tcg_gen_add_i64,
3124         NULL,
3125     };
3126
3127     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3128                              accfn[a->size]);
3129 }
3130
3131 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3132
3133 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3134                        ZipFn *fn)
3135 {
3136     TCGv_ptr pd, pm;
3137
3138     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3139         return false;
3140     }
3141
3142     /* UNDEF accesses to D16-D31 if they don't exist. */
3143     if (!dc_isar_feature(aa32_simd_r32, s) &&
3144         ((a->vd | a->vm) & 0x10)) {
3145         return false;
3146     }
3147
3148     if ((a->vd | a->vm) & a->q) {
3149         return false;
3150     }
3151
3152     if (!fn) {
3153         /* Bad size or size/q combination */
3154         return false;
3155     }
3156
3157     if (!vfp_access_check(s)) {
3158         return true;
3159     }
3160
3161     pd = vfp_reg_ptr(true, a->vd);
3162     pm = vfp_reg_ptr(true, a->vm);
3163     fn(pd, pm);
3164     tcg_temp_free_ptr(pd);
3165     tcg_temp_free_ptr(pm);
3166     return true;
3167 }
3168
3169 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3170 {
3171     static ZipFn * const fn[2][4] = {
3172         {
3173             gen_helper_neon_unzip8,
3174             gen_helper_neon_unzip16,
3175             NULL,
3176             NULL,
3177         }, {
3178             gen_helper_neon_qunzip8,
3179             gen_helper_neon_qunzip16,
3180             gen_helper_neon_qunzip32,
3181             NULL,
3182         }
3183     };
3184     return do_zip_uzp(s, a, fn[a->q][a->size]);
3185 }
3186
3187 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3188 {
3189     static ZipFn * const fn[2][4] = {
3190         {
3191             gen_helper_neon_zip8,
3192             gen_helper_neon_zip16,
3193             NULL,
3194             NULL,
3195         }, {
3196             gen_helper_neon_qzip8,
3197             gen_helper_neon_qzip16,
3198             gen_helper_neon_qzip32,
3199             NULL,
3200         }
3201     };
3202     return do_zip_uzp(s, a, fn[a->q][a->size]);
3203 }
3204
3205 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3206                      NeonGenNarrowEnvFn *narrowfn)
3207 {
3208     TCGv_i64 rm;
3209     TCGv_i32 rd0, rd1;
3210
3211     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3212         return false;
3213     }
3214
3215     /* UNDEF accesses to D16-D31 if they don't exist. */
3216     if (!dc_isar_feature(aa32_simd_r32, s) &&
3217         ((a->vd | a->vm) & 0x10)) {
3218         return false;
3219     }
3220
3221     if (a->vm & 1) {
3222         return false;
3223     }
3224
3225     if (!narrowfn) {
3226         return false;
3227     }
3228
3229     if (!vfp_access_check(s)) {
3230         return true;
3231     }
3232
3233     rm = tcg_temp_new_i64();
3234     rd0 = tcg_temp_new_i32();
3235     rd1 = tcg_temp_new_i32();
3236
3237     neon_load_reg64(rm, a->vm);
3238     narrowfn(rd0, cpu_env, rm);
3239     neon_load_reg64(rm, a->vm + 1);
3240     narrowfn(rd1, cpu_env, rm);
3241     neon_store_reg(a->vd, 0, rd0);
3242     neon_store_reg(a->vd, 1, rd1);
3243     tcg_temp_free_i64(rm);
3244     return true;
3245 }
3246
3247 #define DO_VMOVN(INSN, FUNC)                                    \
3248     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3249     {                                                           \
3250         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3251             FUNC##8,                                            \
3252             FUNC##16,                                           \
3253             FUNC##32,                                           \
3254             NULL,                                               \
3255         };                                                      \
3256         return do_vmovn(s, a, narrowfn[a->size]);               \
3257     }
3258
3259 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3260 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3261 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3262 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3263
3264 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3265 {
3266     TCGv_i32 rm0, rm1;
3267     TCGv_i64 rd;
3268     static NeonGenWidenFn * const widenfns[] = {
3269         gen_helper_neon_widen_u8,
3270         gen_helper_neon_widen_u16,
3271         tcg_gen_extu_i32_i64,
3272         NULL,
3273     };
3274     NeonGenWidenFn *widenfn = widenfns[a->size];
3275
3276     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3277         return false;
3278     }
3279
3280     /* UNDEF accesses to D16-D31 if they don't exist. */
3281     if (!dc_isar_feature(aa32_simd_r32, s) &&
3282         ((a->vd | a->vm) & 0x10)) {
3283         return false;
3284     }
3285
3286     if (a->vd & 1) {
3287         return false;
3288     }
3289
3290     if (!widenfn) {
3291         return false;
3292     }
3293
3294     if (!vfp_access_check(s)) {
3295         return true;
3296     }
3297
3298     rd = tcg_temp_new_i64();
3299
3300     rm0 = neon_load_reg(a->vm, 0);
3301     rm1 = neon_load_reg(a->vm, 1);
3302
3303     widenfn(rd, rm0);
3304     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3305     neon_store_reg64(rd, a->vd);
3306     widenfn(rd, rm1);
3307     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3308     neon_store_reg64(rd, a->vd + 1);
3309
3310     tcg_temp_free_i64(rd);
3311     tcg_temp_free_i32(rm0);
3312     tcg_temp_free_i32(rm1);
3313     return true;
3314 }
3315
3316 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3317 {
3318     TCGv_ptr fpst;
3319     TCGv_i32 ahp, tmp, tmp2, tmp3;
3320
3321     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3322         !dc_isar_feature(aa32_fp16_spconv, s)) {
3323         return false;
3324     }
3325
3326     /* UNDEF accesses to D16-D31 if they don't exist. */
3327     if (!dc_isar_feature(aa32_simd_r32, s) &&
3328         ((a->vd | a->vm) & 0x10)) {
3329         return false;
3330     }
3331
3332     if ((a->vm & 1) || (a->size != 1)) {
3333         return false;
3334     }
3335
3336     if (!vfp_access_check(s)) {
3337         return true;
3338     }
3339
3340     fpst = fpstatus_ptr(FPST_STD);
3341     ahp = get_ahp_flag();
3342     tmp = neon_load_reg(a->vm, 0);
3343     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3344     tmp2 = neon_load_reg(a->vm, 1);
3345     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3346     tcg_gen_shli_i32(tmp2, tmp2, 16);
3347     tcg_gen_or_i32(tmp2, tmp2, tmp);
3348     tcg_temp_free_i32(tmp);
3349     tmp = neon_load_reg(a->vm, 2);
3350     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3351     tmp3 = neon_load_reg(a->vm, 3);
3352     neon_store_reg(a->vd, 0, tmp2);
3353     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3354     tcg_gen_shli_i32(tmp3, tmp3, 16);
3355     tcg_gen_or_i32(tmp3, tmp3, tmp);
3356     neon_store_reg(a->vd, 1, tmp3);
3357     tcg_temp_free_i32(tmp);
3358     tcg_temp_free_i32(ahp);
3359     tcg_temp_free_ptr(fpst);
3360
3361     return true;
3362 }
3363
3364 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3365 {
3366     TCGv_ptr fpst;
3367     TCGv_i32 ahp, tmp, tmp2, tmp3;
3368
3369     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3370         !dc_isar_feature(aa32_fp16_spconv, s)) {
3371         return false;
3372     }
3373
3374     /* UNDEF accesses to D16-D31 if they don't exist. */
3375     if (!dc_isar_feature(aa32_simd_r32, s) &&
3376         ((a->vd | a->vm) & 0x10)) {
3377         return false;
3378     }
3379
3380     if ((a->vd & 1) || (a->size != 1)) {
3381         return false;
3382     }
3383
3384     if (!vfp_access_check(s)) {
3385         return true;
3386     }
3387
3388     fpst = fpstatus_ptr(FPST_STD);
3389     ahp = get_ahp_flag();
3390     tmp3 = tcg_temp_new_i32();
3391     tmp = neon_load_reg(a->vm, 0);
3392     tmp2 = neon_load_reg(a->vm, 1);
3393     tcg_gen_ext16u_i32(tmp3, tmp);
3394     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3395     neon_store_reg(a->vd, 0, tmp3);
3396     tcg_gen_shri_i32(tmp, tmp, 16);
3397     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3398     neon_store_reg(a->vd, 1, tmp);
3399     tmp3 = tcg_temp_new_i32();
3400     tcg_gen_ext16u_i32(tmp3, tmp2);
3401     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3402     neon_store_reg(a->vd, 2, tmp3);
3403     tcg_gen_shri_i32(tmp2, tmp2, 16);
3404     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3405     neon_store_reg(a->vd, 3, tmp2);
3406     tcg_temp_free_i32(ahp);
3407     tcg_temp_free_ptr(fpst);
3408
3409     return true;
3410 }
3411
3412 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3413 {
3414     int vec_size = a->q ? 16 : 8;
3415     int rd_ofs = neon_reg_offset(a->vd, 0);
3416     int rm_ofs = neon_reg_offset(a->vm, 0);
3417
3418     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3419         return false;
3420     }
3421
3422     /* UNDEF accesses to D16-D31 if they don't exist. */
3423     if (!dc_isar_feature(aa32_simd_r32, s) &&
3424         ((a->vd | a->vm) & 0x10)) {
3425         return false;
3426     }
3427
3428     if (a->size == 3) {
3429         return false;
3430     }
3431
3432     if ((a->vd | a->vm) & a->q) {
3433         return false;
3434     }
3435
3436     if (!vfp_access_check(s)) {
3437         return true;
3438     }
3439
3440     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3441
3442     return true;
3443 }
3444
3445 #define DO_2MISC_VEC(INSN, FN)                                  \
3446     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3447     {                                                           \
3448         return do_2misc_vec(s, a, FN);                          \
3449     }
3450
3451 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3452 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3453 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3454 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3455 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3456 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3457 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3458
3459 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3460 {
3461     if (a->size != 0) {
3462         return false;
3463     }
3464     return do_2misc_vec(s, a, tcg_gen_gvec_not);
3465 }
3466
3467 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3468     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3469                          uint32_t rm_ofs, uint32_t oprsz,               \
3470                          uint32_t maxsz)                                \
3471     {                                                                   \
3472         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3473                            DATA, FUNC);                                 \
3474     }
3475
3476 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3477     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3478                          uint32_t rm_ofs, uint32_t oprsz,               \
3479                          uint32_t maxsz)                                \
3480     {                                                                   \
3481         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3482     }
3483
3484 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3485 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3486 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3487 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3488 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3489 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3490 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3491
3492 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3493     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3494     {                                                           \
3495         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3496             return false;                                       \
3497         }                                                       \
3498         return do_2misc_vec(s, a, gen_##INSN);                  \
3499     }
3500
3501 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3502 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3503 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3504 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3505 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3506 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3507 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3508
3509 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3510 {
3511     int pass;
3512
3513     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3514     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3515         return false;
3516     }
3517
3518     /* UNDEF accesses to D16-D31 if they don't exist. */
3519     if (!dc_isar_feature(aa32_simd_r32, s) &&
3520         ((a->vd | a->vm) & 0x10)) {
3521         return false;
3522     }
3523
3524     if (!fn) {
3525         return false;
3526     }
3527
3528     if ((a->vd | a->vm) & a->q) {
3529         return false;
3530     }
3531
3532     if (!vfp_access_check(s)) {
3533         return true;
3534     }
3535
3536     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3537         TCGv_i32 tmp = neon_load_reg(a->vm, pass);
3538         fn(tmp, tmp);
3539         neon_store_reg(a->vd, pass, tmp);
3540     }
3541
3542     return true;
3543 }
3544
3545 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3546 {
3547     static NeonGenOneOpFn * const fn[] = {
3548         tcg_gen_bswap32_i32,
3549         gen_swap_half,
3550         NULL,
3551         NULL,
3552     };
3553     return do_2misc(s, a, fn[a->size]);
3554 }
3555
3556 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3557 {
3558     if (a->size != 0) {
3559         return false;
3560     }
3561     return do_2misc(s, a, gen_rev16);
3562 }
3563
3564 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3565 {
3566     static NeonGenOneOpFn * const fn[] = {
3567         gen_helper_neon_cls_s8,
3568         gen_helper_neon_cls_s16,
3569         gen_helper_neon_cls_s32,
3570         NULL,
3571     };
3572     return do_2misc(s, a, fn[a->size]);
3573 }
3574
3575 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3576 {
3577     tcg_gen_clzi_i32(rd, rm, 32);
3578 }
3579
3580 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3581 {
3582     static NeonGenOneOpFn * const fn[] = {
3583         gen_helper_neon_clz_u8,
3584         gen_helper_neon_clz_u16,
3585         do_VCLZ_32,
3586         NULL,
3587     };
3588     return do_2misc(s, a, fn[a->size]);
3589 }
3590
3591 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3592 {
3593     if (a->size != 0) {
3594         return false;
3595     }
3596     return do_2misc(s, a, gen_helper_neon_cnt_u8);
3597 }
3598
3599 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3600                        uint32_t oprsz, uint32_t maxsz)
3601 {
3602     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3603                       vece == MO_16 ? 0x7fff : 0x7fffffff,
3604                       oprsz, maxsz);
3605 }
3606
3607 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3608 {
3609     if (a->size == MO_16) {
3610         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3611             return false;
3612         }
3613     } else if (a->size != MO_32) {
3614         return false;
3615     }
3616     return do_2misc_vec(s, a, gen_VABS_F);
3617 }
3618
3619 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3620                        uint32_t oprsz, uint32_t maxsz)
3621 {
3622     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3623                       vece == MO_16 ? 0x8000 : 0x80000000,
3624                       oprsz, maxsz);
3625 }
3626
3627 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3628 {
3629     if (a->size == MO_16) {
3630         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3631             return false;
3632         }
3633     } else if (a->size != MO_32) {
3634         return false;
3635     }
3636     return do_2misc_vec(s, a, gen_VNEG_F);
3637 }
3638
3639 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3640 {
3641     if (a->size != 2) {
3642         return false;
3643     }
3644     return do_2misc(s, a, gen_helper_recpe_u32);
3645 }
3646
3647 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3648 {
3649     if (a->size != 2) {
3650         return false;
3651     }
3652     return do_2misc(s, a, gen_helper_rsqrte_u32);
3653 }
3654
3655 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3656     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3657     {                                                   \
3658         FUNC(d, cpu_env, m);                            \
3659     }
3660
3661 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3662 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3663 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3664 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3665 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3666 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3667
3668 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3669 {
3670     static NeonGenOneOpFn * const fn[] = {
3671         gen_VQABS_s8,
3672         gen_VQABS_s16,
3673         gen_VQABS_s32,
3674         NULL,
3675     };
3676     return do_2misc(s, a, fn[a->size]);
3677 }
3678
3679 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3680 {
3681     static NeonGenOneOpFn * const fn[] = {
3682         gen_VQNEG_s8,
3683         gen_VQNEG_s16,
3684         gen_VQNEG_s32,
3685         NULL,
3686     };
3687     return do_2misc(s, a, fn[a->size]);
3688 }
3689
3690 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3691     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3692                            uint32_t rm_ofs,                             \
3693                            uint32_t oprsz, uint32_t maxsz)              \
3694     {                                                                   \
3695         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3696             NULL, HFUNC, SFUNC, NULL,                                   \
3697         };                                                              \
3698         TCGv_ptr fpst;                                                  \
3699         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3700         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3701                            fns[vece]);                                  \
3702         tcg_temp_free_ptr(fpst);                                        \
3703     }                                                                   \
3704     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3705     {                                                                   \
3706         if (a->size == MO_16) {                                         \
3707             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3708                 return false;                                           \
3709             }                                                           \
3710         } else if (a->size != MO_32) {                                  \
3711             return false;                                               \
3712         }                                                               \
3713         return do_2misc_vec(s, a, gen_##INSN);                          \
3714     }
3715
3716 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3717 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3718 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3719 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3720 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3721 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3722 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3723 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3724 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3725 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3726 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3727
3728 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3729
3730 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3731 {
3732     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3733         return false;
3734     }
3735     return trans_VRINTX_impl(s, a);
3736 }
3737
3738 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3739     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3740                            uint32_t rm_ofs,                             \
3741                            uint32_t oprsz, uint32_t maxsz)              \
3742     {                                                                   \
3743         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3744             NULL,                                                       \
3745             gen_helper_gvec_##OP##h,                                    \
3746             gen_helper_gvec_##OP##s,                                    \
3747             NULL,                                                       \
3748         };                                                              \
3749         TCGv_ptr fpst;                                                  \
3750         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3751         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3752                            arm_rmode_to_sf(RMODE), fns[vece]);          \
3753         tcg_temp_free_ptr(fpst);                                        \
3754     }                                                                   \
3755     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3756     {                                                                   \
3757         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3758             return false;                                               \
3759         }                                                               \
3760         if (a->size == MO_16) {                                         \
3761             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3762                 return false;                                           \
3763             }                                                           \
3764         } else if (a->size != MO_32) {                                  \
3765             return false;                                               \
3766         }                                                               \
3767         return do_2misc_vec(s, a, gen_##INSN);                          \
3768     }
3769
3770 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3771 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3772 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3773 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3774 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3775 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3776 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3777 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3778
3779 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3780 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3781 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3782 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3783 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3784
3785 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3786 {
3787     TCGv_i64 rm, rd;
3788     int pass;
3789
3790     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3791         return false;
3792     }
3793
3794     /* UNDEF accesses to D16-D31 if they don't exist. */
3795     if (!dc_isar_feature(aa32_simd_r32, s) &&
3796         ((a->vd | a->vm) & 0x10)) {
3797         return false;
3798     }
3799
3800     if (a->size != 0) {
3801         return false;
3802     }
3803
3804     if ((a->vd | a->vm) & a->q) {
3805         return false;
3806     }
3807
3808     if (!vfp_access_check(s)) {
3809         return true;
3810     }
3811
3812     rm = tcg_temp_new_i64();
3813     rd = tcg_temp_new_i64();
3814     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3815         neon_load_reg64(rm, a->vm + pass);
3816         neon_load_reg64(rd, a->vd + pass);
3817         neon_store_reg64(rm, a->vd + pass);
3818         neon_store_reg64(rd, a->vm + pass);
3819     }
3820     tcg_temp_free_i64(rm);
3821     tcg_temp_free_i64(rd);
3822
3823     return true;
3824 }
3825 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3826 {
3827     TCGv_i32 rd, tmp;
3828
3829     rd = tcg_temp_new_i32();
3830     tmp = tcg_temp_new_i32();
3831
3832     tcg_gen_shli_i32(rd, t0, 8);
3833     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3834     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3835     tcg_gen_or_i32(rd, rd, tmp);
3836
3837     tcg_gen_shri_i32(t1, t1, 8);
3838     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3839     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3840     tcg_gen_or_i32(t1, t1, tmp);
3841     tcg_gen_mov_i32(t0, rd);
3842
3843     tcg_temp_free_i32(tmp);
3844     tcg_temp_free_i32(rd);
3845 }
3846
3847 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3848 {
3849     TCGv_i32 rd, tmp;
3850
3851     rd = tcg_temp_new_i32();
3852     tmp = tcg_temp_new_i32();
3853
3854     tcg_gen_shli_i32(rd, t0, 16);
3855     tcg_gen_andi_i32(tmp, t1, 0xffff);
3856     tcg_gen_or_i32(rd, rd, tmp);
3857     tcg_gen_shri_i32(t1, t1, 16);
3858     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3859     tcg_gen_or_i32(t1, t1, tmp);
3860     tcg_gen_mov_i32(t0, rd);
3861
3862     tcg_temp_free_i32(tmp);
3863     tcg_temp_free_i32(rd);
3864 }
3865
3866 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3867 {
3868     TCGv_i32 tmp, tmp2;
3869     int pass;
3870
3871     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3872         return false;
3873     }
3874
3875     /* UNDEF accesses to D16-D31 if they don't exist. */
3876     if (!dc_isar_feature(aa32_simd_r32, s) &&
3877         ((a->vd | a->vm) & 0x10)) {
3878         return false;
3879     }
3880
3881     if ((a->vd | a->vm) & a->q) {
3882         return false;
3883     }
3884
3885     if (a->size == 3) {
3886         return false;
3887     }
3888
3889     if (!vfp_access_check(s)) {
3890         return true;
3891     }
3892
3893     if (a->size == 2) {
3894         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3895             tmp = neon_load_reg(a->vm, pass);
3896             tmp2 = neon_load_reg(a->vd, pass + 1);
3897             neon_store_reg(a->vm, pass, tmp2);
3898             neon_store_reg(a->vd, pass + 1, tmp);
3899         }
3900     } else {
3901         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3902             tmp = neon_load_reg(a->vm, pass);
3903             tmp2 = neon_load_reg(a->vd, pass);
3904             if (a->size == 0) {
3905                 gen_neon_trn_u8(tmp, tmp2);
3906             } else {
3907                 gen_neon_trn_u16(tmp, tmp2);
3908             }
3909             neon_store_reg(a->vm, pass, tmp2);
3910             neon_store_reg(a->vd, pass, tmp);
3911         }
3912     }
3913     return true;
3914 }