target/arm/translate-neon.c.inc

   1 /*
   2  *  ARM translation: AArch32 Neon instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2020 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 static inline int plus1(DisasContext *s, int x)
  30 {
  31     return x + 1;
  32 }
  33
  34 static inline int rsub_64(DisasContext *s, int x)
  35 {
  36     return 64 - x;
  37 }
  38
  39 static inline int rsub_32(DisasContext *s, int x)
  40 {
  41     return 32 - x;
  42 }
  43 static inline int rsub_16(DisasContext *s, int x)
  44 {
  45     return 16 - x;
  46 }
  47 static inline int rsub_8(DisasContext *s, int x)
  48 {
  49     return 8 - x;
  50 }
  51
  52 /* Include the generated Neon decoder */
  53 #include "decode-neon-dp.c.inc"
  54 #include "decode-neon-ls.c.inc"
  55 #include "decode-neon-shared.c.inc"
  56
  57 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
  58  * where 0 is the least significant end of the register.
  59  */
  60 static inline long
  61 neon_element_offset(int reg, int element, MemOp size)
  62 {
  63     int element_size = 1 << size;
  64     int ofs = element * element_size;
  65 #ifdef HOST_WORDS_BIGENDIAN
  66     /* Calculate the offset assuming fully little-endian,
  67      * then XOR to account for the order of the 8-byte units.
  68      */
  69     if (element_size < 8) {
  70         ofs ^= 8 - element_size;
  71     }
  72 #endif
  73     return neon_reg_offset(reg, 0) + ofs;
  74 }
  75
  76 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
  77 {
  78     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  79
  80     switch (mop) {
  81     case MO_UB:
  82         tcg_gen_ld8u_i32(var, cpu_env, offset);
  83         break;
  84     case MO_UW:
  85         tcg_gen_ld16u_i32(var, cpu_env, offset);
  86         break;
  87     case MO_UL:
  88         tcg_gen_ld_i32(var, cpu_env, offset);
  89         break;
  90     default:
  91         g_assert_not_reached();
  92     }
  93 }
  94
  95 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
  96 {
  97     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  98
  99     switch (mop) {
 100     case MO_UB:
 101         tcg_gen_ld8u_i64(var, cpu_env, offset);
 102         break;
 103     case MO_UW:
 104         tcg_gen_ld16u_i64(var, cpu_env, offset);
 105         break;
 106     case MO_UL:
 107         tcg_gen_ld32u_i64(var, cpu_env, offset);
 108         break;
 109     case MO_Q:
 110         tcg_gen_ld_i64(var, cpu_env, offset);
 111         break;
 112     default:
 113         g_assert_not_reached();
 114     }
 115 }
 116
 117 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
 118 {
 119     long offset = neon_element_offset(reg, ele, size);
 120
 121     switch (size) {
 122     case MO_8:
 123         tcg_gen_st8_i32(var, cpu_env, offset);
 124         break;
 125     case MO_16:
 126         tcg_gen_st16_i32(var, cpu_env, offset);
 127         break;
 128     case MO_32:
 129         tcg_gen_st_i32(var, cpu_env, offset);
 130         break;
 131     default:
 132         g_assert_not_reached();
 133     }
 134 }
 135
 136 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
 137 {
 138     long offset = neon_element_offset(reg, ele, size);
 139
 140     switch (size) {
 141     case MO_8:
 142         tcg_gen_st8_i64(var, cpu_env, offset);
 143         break;
 144     case MO_16:
 145         tcg_gen_st16_i64(var, cpu_env, offset);
 146         break;
 147     case MO_32:
 148         tcg_gen_st32_i64(var, cpu_env, offset);
 149         break;
 150     case MO_64:
 151         tcg_gen_st_i64(var, cpu_env, offset);
 152         break;
 153     default:
 154         g_assert_not_reached();
 155     }
 156 }
 157
 158 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
 159 {
 160     int opr_sz;
 161     TCGv_ptr fpst;
 162     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 163
 164     if (!dc_isar_feature(aa32_vcma, s)
 165         || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
 166         return false;
 167     }
 168
 169     /* UNDEF accesses to D16-D31 if they don't exist. */
 170     if (!dc_isar_feature(aa32_simd_r32, s) &&
 171         ((a->vd | a->vn | a->vm) & 0x10)) {
 172         return false;
 173     }
 174
 175     if ((a->vn | a->vm | a->vd) & a->q) {
 176         return false;
 177     }
 178
 179     if (!vfp_access_check(s)) {
 180         return true;
 181     }
 182
 183     opr_sz = (1 + a->q) * 8;
 184     fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
 185     fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
 186     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 187                        vfp_reg_offset(1, a->vn),
 188                        vfp_reg_offset(1, a->vm),
 189                        fpst, opr_sz, opr_sz, a->rot,
 190                        fn_gvec_ptr);
 191     tcg_temp_free_ptr(fpst);
 192     return true;
 193 }
 194
 195 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
 196 {
 197     int opr_sz;
 198     TCGv_ptr fpst;
 199     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 200
 201     if (!dc_isar_feature(aa32_vcma, s)
 202         || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
 203         return false;
 204     }
 205
 206     /* UNDEF accesses to D16-D31 if they don't exist. */
 207     if (!dc_isar_feature(aa32_simd_r32, s) &&
 208         ((a->vd | a->vn | a->vm) & 0x10)) {
 209         return false;
 210     }
 211
 212     if ((a->vn | a->vm | a->vd) & a->q) {
 213         return false;
 214     }
 215
 216     if (!vfp_access_check(s)) {
 217         return true;
 218     }
 219
 220     opr_sz = (1 + a->q) * 8;
 221     fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
 222     fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
 223     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 224                        vfp_reg_offset(1, a->vn),
 225                        vfp_reg_offset(1, a->vm),
 226                        fpst, opr_sz, opr_sz, a->rot,
 227                        fn_gvec_ptr);
 228     tcg_temp_free_ptr(fpst);
 229     return true;
 230 }
 231
 232 static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
 233 {
 234     int opr_sz;
 235     gen_helper_gvec_3 *fn_gvec;
 236
 237     if (!dc_isar_feature(aa32_dp, s)) {
 238         return false;
 239     }
 240
 241     /* UNDEF accesses to D16-D31 if they don't exist. */
 242     if (!dc_isar_feature(aa32_simd_r32, s) &&
 243         ((a->vd | a->vn | a->vm) & 0x10)) {
 244         return false;
 245     }
 246
 247     if ((a->vn | a->vm | a->vd) & a->q) {
 248         return false;
 249     }
 250
 251     if (!vfp_access_check(s)) {
 252         return true;
 253     }
 254
 255     opr_sz = (1 + a->q) * 8;
 256     fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
 257     tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
 258                        vfp_reg_offset(1, a->vn),
 259                        vfp_reg_offset(1, a->vm),
 260                        opr_sz, opr_sz, 0, fn_gvec);
 261     return true;
 262 }
 263
 264 static bool trans_VFML(DisasContext *s, arg_VFML *a)
 265 {
 266     int opr_sz;
 267
 268     if (!dc_isar_feature(aa32_fhm, s)) {
 269         return false;
 270     }
 271
 272     /* UNDEF accesses to D16-D31 if they don't exist. */
 273     if (!dc_isar_feature(aa32_simd_r32, s) &&
 274         (a->vd & 0x10)) {
 275         return false;
 276     }
 277
 278     if (a->vd & a->q) {
 279         return false;
 280     }
 281
 282     if (!vfp_access_check(s)) {
 283         return true;
 284     }
 285
 286     opr_sz = (1 + a->q) * 8;
 287     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 288                        vfp_reg_offset(a->q, a->vn),
 289                        vfp_reg_offset(a->q, a->vm),
 290                        cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
 291                        gen_helper_gvec_fmlal_a32);
 292     return true;
 293 }
 294
 295 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
 296 {
 297     gen_helper_gvec_3_ptr *fn_gvec_ptr;
 298     int opr_sz;
 299     TCGv_ptr fpst;
 300
 301     if (!dc_isar_feature(aa32_vcma, s)) {
 302         return false;
 303     }
 304     if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
 305         return false;
 306     }
 307
 308     /* UNDEF accesses to D16-D31 if they don't exist. */
 309     if (!dc_isar_feature(aa32_simd_r32, s) &&
 310         ((a->vd | a->vn | a->vm) & 0x10)) {
 311         return false;
 312     }
 313
 314     if ((a->vd | a->vn) & a->q) {
 315         return false;
 316     }
 317
 318     if (!vfp_access_check(s)) {
 319         return true;
 320     }
 321
 322     fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
 323                    : gen_helper_gvec_fcmlah_idx);
 324     opr_sz = (1 + a->q) * 8;
 325     fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
 326     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 327                        vfp_reg_offset(1, a->vn),
 328                        vfp_reg_offset(1, a->vm),
 329                        fpst, opr_sz, opr_sz,
 330                        (a->index << 2) | a->rot, fn_gvec_ptr);
 331     tcg_temp_free_ptr(fpst);
 332     return true;
 333 }
 334
 335 static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
 336 {
 337     gen_helper_gvec_3 *fn_gvec;
 338     int opr_sz;
 339     TCGv_ptr fpst;
 340
 341     if (!dc_isar_feature(aa32_dp, s)) {
 342         return false;
 343     }
 344
 345     /* UNDEF accesses to D16-D31 if they don't exist. */
 346     if (!dc_isar_feature(aa32_simd_r32, s) &&
 347         ((a->vd | a->vn) & 0x10)) {
 348         return false;
 349     }
 350
 351     if ((a->vd | a->vn) & a->q) {
 352         return false;
 353     }
 354
 355     if (!vfp_access_check(s)) {
 356         return true;
 357     }
 358
 359     fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
 360     opr_sz = (1 + a->q) * 8;
 361     fpst = fpstatus_ptr(FPST_STD);
 362     tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
 363                        vfp_reg_offset(1, a->vn),
 364                        vfp_reg_offset(1, a->rm),
 365                        opr_sz, opr_sz, a->index, fn_gvec);
 366     tcg_temp_free_ptr(fpst);
 367     return true;
 368 }
 369
 370 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
 371 {
 372     int opr_sz;
 373
 374     if (!dc_isar_feature(aa32_fhm, s)) {
 375         return false;
 376     }
 377
 378     /* UNDEF accesses to D16-D31 if they don't exist. */
 379     if (!dc_isar_feature(aa32_simd_r32, s) &&
 380         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
 381         return false;
 382     }
 383
 384     if (a->vd & a->q) {
 385         return false;
 386     }
 387
 388     if (!vfp_access_check(s)) {
 389         return true;
 390     }
 391
 392     opr_sz = (1 + a->q) * 8;
 393     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 394                        vfp_reg_offset(a->q, a->vn),
 395                        vfp_reg_offset(a->q, a->rm),
 396                        cpu_env, opr_sz, opr_sz,
 397                        (a->index << 2) | a->s, /* is_2 == 0 */
 398                        gen_helper_gvec_fmlal_idx_a32);
 399     return true;
 400 }
 401
 402 static struct {
 403     int nregs;
 404     int interleave;
 405     int spacing;
 406 } const neon_ls_element_type[11] = {
 407     {1, 4, 1},
 408     {1, 4, 2},
 409     {4, 1, 1},
 410     {2, 2, 2},
 411     {1, 3, 1},
 412     {1, 3, 2},
 413     {3, 1, 1},
 414     {1, 1, 1},
 415     {1, 2, 1},
 416     {1, 2, 2},
 417     {2, 1, 1}
 418 };
 419
 420 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
 421                                       int stride)
 422 {
 423     if (rm != 15) {
 424         TCGv_i32 base;
 425
 426         base = load_reg(s, rn);
 427         if (rm == 13) {
 428             tcg_gen_addi_i32(base, base, stride);
 429         } else {
 430             TCGv_i32 index;
 431             index = load_reg(s, rm);
 432             tcg_gen_add_i32(base, base, index);
 433             tcg_temp_free_i32(index);
 434         }
 435         store_reg(s, rn, base);
 436     }
 437 }
 438
 439 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
 440 {
 441     /* Neon load/store multiple structures */
 442     int nregs, interleave, spacing, reg, n;
 443     MemOp endian = s->be_data;
 444     int mmu_idx = get_mem_index(s);
 445     int size = a->size;
 446     TCGv_i64 tmp64;
 447     TCGv_i32 addr, tmp;
 448
 449     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 450         return false;
 451     }
 452
 453     /* UNDEF accesses to D16-D31 if they don't exist */
 454     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 455         return false;
 456     }
 457     if (a->itype > 10) {
 458         return false;
 459     }
 460     /* Catch UNDEF cases for bad values of align field */
 461     switch (a->itype & 0xc) {
 462     case 4:
 463         if (a->align >= 2) {
 464             return false;
 465         }
 466         break;
 467     case 8:
 468         if (a->align == 3) {
 469             return false;
 470         }
 471         break;
 472     default:
 473         break;
 474     }
 475     nregs = neon_ls_element_type[a->itype].nregs;
 476     interleave = neon_ls_element_type[a->itype].interleave;
 477     spacing = neon_ls_element_type[a->itype].spacing;
 478     if (size == 3 && (interleave | spacing) != 1) {
 479         return false;
 480     }
 481
 482     if (!vfp_access_check(s)) {
 483         return true;
 484     }
 485
 486     /* For our purposes, bytes are always little-endian.  */
 487     if (size == 0) {
 488         endian = MO_LE;
 489     }
 490     /*
 491      * Consecutive little-endian elements from a single register
 492      * can be promoted to a larger little-endian operation.
 493      */
 494     if (interleave == 1 && endian == MO_LE) {
 495         size = 3;
 496     }
 497     tmp64 = tcg_temp_new_i64();
 498     addr = tcg_temp_new_i32();
 499     tmp = tcg_const_i32(1 << size);
 500     load_reg_var(s, addr, a->rn);
 501     for (reg = 0; reg < nregs; reg++) {
 502         for (n = 0; n < 8 >> size; n++) {
 503             int xs;
 504             for (xs = 0; xs < interleave; xs++) {
 505                 int tt = a->vd + reg + spacing * xs;
 506
 507                 if (a->l) {
 508                     gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
 509                     neon_store_element64(tt, n, size, tmp64);
 510                 } else {
 511                     neon_load_element64(tmp64, tt, n, size);
 512                     gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
 513                 }
 514                 tcg_gen_add_i32(addr, addr, tmp);
 515             }
 516         }
 517     }
 518     tcg_temp_free_i32(addr);
 519     tcg_temp_free_i32(tmp);
 520     tcg_temp_free_i64(tmp64);
 521
 522     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
 523     return true;
 524 }
 525
 526 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
 527 {
 528     /* Neon load single structure to all lanes */
 529     int reg, stride, vec_size;
 530     int vd = a->vd;
 531     int size = a->size;
 532     int nregs = a->n + 1;
 533     TCGv_i32 addr, tmp;
 534
 535     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 536         return false;
 537     }
 538
 539     /* UNDEF accesses to D16-D31 if they don't exist */
 540     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 541         return false;
 542     }
 543
 544     if (size == 3) {
 545         if (nregs != 4 || a->a == 0) {
 546             return false;
 547         }
 548         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
 549         size = 2;
 550     }
 551     if (nregs == 1 && a->a == 1 && size == 0) {
 552         return false;
 553     }
 554     if (nregs == 3 && a->a == 1) {
 555         return false;
 556     }
 557
 558     if (!vfp_access_check(s)) {
 559         return true;
 560     }
 561
 562     /*
 563      * VLD1 to all lanes: T bit indicates how many Dregs to write.
 564      * VLD2/3/4 to all lanes: T bit indicates register stride.
 565      */
 566     stride = a->t ? 2 : 1;
 567     vec_size = nregs == 1 ? stride * 8 : 8;
 568
 569     tmp = tcg_temp_new_i32();
 570     addr = tcg_temp_new_i32();
 571     load_reg_var(s, addr, a->rn);
 572     for (reg = 0; reg < nregs; reg++) {
 573         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
 574                         s->be_data | size);
 575         if ((vd & 1) && vec_size == 16) {
 576             /*
 577              * We cannot write 16 bytes at once because the
 578              * destination is unaligned.
 579              */
 580             tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
 581                                  8, 8, tmp);
 582             tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
 583                              neon_reg_offset(vd, 0), 8, 8);
 584         } else {
 585             tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
 586                                  vec_size, vec_size, tmp);
 587         }
 588         tcg_gen_addi_i32(addr, addr, 1 << size);
 589         vd += stride;
 590     }
 591     tcg_temp_free_i32(tmp);
 592     tcg_temp_free_i32(addr);
 593
 594     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
 595
 596     return true;
 597 }
 598
 599 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
 600 {
 601     /* Neon load/store single structure to one lane */
 602     int reg;
 603     int nregs = a->n + 1;
 604     int vd = a->vd;
 605     TCGv_i32 addr, tmp;
 606
 607     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 608         return false;
 609     }
 610
 611     /* UNDEF accesses to D16-D31 if they don't exist */
 612     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 613         return false;
 614     }
 615
 616     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
 617     switch (nregs) {
 618     case 1:
 619         if (((a->align & (1 << a->size)) != 0) ||
 620             (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
 621             return false;
 622         }
 623         break;
 624     case 3:
 625         if ((a->align & 1) != 0) {
 626             return false;
 627         }
 628         /* fall through */
 629     case 2:
 630         if (a->size == 2 && (a->align & 2) != 0) {
 631             return false;
 632         }
 633         break;
 634     case 4:
 635         if ((a->size == 2) && ((a->align & 3) == 3)) {
 636             return false;
 637         }
 638         break;
 639     default:
 640         abort();
 641     }
 642     if ((vd + a->stride * (nregs - 1)) > 31) {
 643         /*
 644          * Attempts to write off the end of the register file are
 645          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
 646          * access off the end of the array that holds the register data.
 647          */
 648         return false;
 649     }
 650
 651     if (!vfp_access_check(s)) {
 652         return true;
 653     }
 654
 655     tmp = tcg_temp_new_i32();
 656     addr = tcg_temp_new_i32();
 657     load_reg_var(s, addr, a->rn);
 658     /*
 659      * TODO: if we implemented alignment exceptions, we should check
 660      * addr against the alignment encoded in a->align here.
 661      */
 662     for (reg = 0; reg < nregs; reg++) {
 663         if (a->l) {
 664             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
 665                             s->be_data | a->size);
 666             neon_store_element(vd, a->reg_idx, a->size, tmp);
 667         } else { /* Store */
 668             neon_load_element(tmp, vd, a->reg_idx, a->size);
 669             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
 670                             s->be_data | a->size);
 671         }
 672         vd += a->stride;
 673         tcg_gen_addi_i32(addr, addr, 1 << a->size);
 674     }
 675     tcg_temp_free_i32(addr);
 676     tcg_temp_free_i32(tmp);
 677
 678     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
 679
 680     return true;
 681 }
 682
 683 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
 684 {
 685     int vec_size = a->q ? 16 : 8;
 686     int rd_ofs = neon_reg_offset(a->vd, 0);
 687     int rn_ofs = neon_reg_offset(a->vn, 0);
 688     int rm_ofs = neon_reg_offset(a->vm, 0);
 689
 690     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 691         return false;
 692     }
 693
 694     /* UNDEF accesses to D16-D31 if they don't exist. */
 695     if (!dc_isar_feature(aa32_simd_r32, s) &&
 696         ((a->vd | a->vn | a->vm) & 0x10)) {
 697         return false;
 698     }
 699
 700     if ((a->vn | a->vm | a->vd) & a->q) {
 701         return false;
 702     }
 703
 704     if (!vfp_access_check(s)) {
 705         return true;
 706     }
 707
 708     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
 709     return true;
 710 }
 711
 712 #define DO_3SAME(INSN, FUNC)                                            \
 713     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 714     {                                                                   \
 715         return do_3same(s, a, FUNC);                                    \
 716     }
 717
 718 DO_3SAME(VADD, tcg_gen_gvec_add)
 719 DO_3SAME(VSUB, tcg_gen_gvec_sub)
 720 DO_3SAME(VAND, tcg_gen_gvec_and)
 721 DO_3SAME(VBIC, tcg_gen_gvec_andc)
 722 DO_3SAME(VORR, tcg_gen_gvec_or)
 723 DO_3SAME(VORN, tcg_gen_gvec_orc)
 724 DO_3SAME(VEOR, tcg_gen_gvec_xor)
 725 DO_3SAME(VSHL_S, gen_gvec_sshl)
 726 DO_3SAME(VSHL_U, gen_gvec_ushl)
 727 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
 728 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
 729 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
 730 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
 731
 732 /* These insns are all gvec_bitsel but with the inputs in various orders. */
 733 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
 734     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 735                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 736                                 uint32_t oprsz, uint32_t maxsz)         \
 737     {                                                                   \
 738         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
 739     }                                                                   \
 740     DO_3SAME(INSN, gen_##INSN##_3s)
 741
 742 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
 743 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
 744 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
 745
 746 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
 747     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 748     {                                                                   \
 749         if (a->size == 3) {                                             \
 750             return false;                                               \
 751         }                                                               \
 752         return do_3same(s, a, FUNC);                                    \
 753     }
 754
 755 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
 756 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
 757 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
 758 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
 759 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
 760 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
 761 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
 762 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
 763 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
 764 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
 765 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
 766 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
 767
 768 #define DO_3SAME_CMP(INSN, COND)                                        \
 769     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 770                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 771                                 uint32_t oprsz, uint32_t maxsz)         \
 772     {                                                                   \
 773         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
 774     }                                                                   \
 775     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
 776
 777 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
 778 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
 779 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
 780 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
 781 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
 782
 783 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
 784     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
 785                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
 786     {                                                                      \
 787         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
 788     }
 789
 790 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
 791
 792 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
 793 {
 794     if (a->size != 0) {
 795         return false;
 796     }
 797     return do_3same(s, a, gen_VMUL_p_3s);
 798 }
 799
 800 #define DO_VQRDMLAH(INSN, FUNC)                                         \
 801     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 802     {                                                                   \
 803         if (!dc_isar_feature(aa32_rdm, s)) {                            \
 804             return false;                                               \
 805         }                                                               \
 806         if (a->size != 1 && a->size != 2) {                             \
 807             return false;                                               \
 808         }                                                               \
 809         return do_3same(s, a, FUNC);                                    \
 810     }
 811
 812 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
 813 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
 814
 815 #define DO_SHA1(NAME, FUNC)                                             \
 816     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 817     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 818     {                                                                   \
 819         if (!dc_isar_feature(aa32_sha1, s)) {                           \
 820             return false;                                               \
 821         }                                                               \
 822         return do_3same(s, a, gen_##NAME##_3s);                         \
 823     }
 824
 825 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
 826 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
 827 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
 828 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
 829
 830 #define DO_SHA2(NAME, FUNC)                                             \
 831     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 832     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 833     {                                                                   \
 834         if (!dc_isar_feature(aa32_sha2, s)) {                           \
 835             return false;                                               \
 836         }                                                               \
 837         return do_3same(s, a, gen_##NAME##_3s);                         \
 838     }
 839
 840 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
 841 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
 842 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
 843
 844 #define DO_3SAME_64(INSN, FUNC)                                         \
 845     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 846                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 847                                 uint32_t oprsz, uint32_t maxsz)         \
 848     {                                                                   \
 849         static const GVecGen3 op = { .fni8 = FUNC };                    \
 850         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
 851     }                                                                   \
 852     DO_3SAME(INSN, gen_##INSN##_3s)
 853
 854 #define DO_3SAME_64_ENV(INSN, FUNC)                                     \
 855     static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
 856     {                                                                   \
 857         FUNC(d, cpu_env, n, m);                                         \
 858     }                                                                   \
 859     DO_3SAME_64(INSN, gen_##INSN##_elt)
 860
 861 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
 862 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
 863 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
 864 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
 865 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
 866 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
 867
 868 #define DO_3SAME_32(INSN, FUNC)                                         \
 869     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 870                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 871                                 uint32_t oprsz, uint32_t maxsz)         \
 872     {                                                                   \
 873         static const GVecGen3 ops[4] = {                                \
 874             { .fni4 = gen_helper_neon_##FUNC##8 },                      \
 875             { .fni4 = gen_helper_neon_##FUNC##16 },                     \
 876             { .fni4 = gen_helper_neon_##FUNC##32 },                     \
 877             { 0 },                                                      \
 878         };                                                              \
 879         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 880     }                                                                   \
 881     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 882     {                                                                   \
 883         if (a->size > 2) {                                              \
 884             return false;                                               \
 885         }                                                               \
 886         return do_3same(s, a, gen_##INSN##_3s);                         \
 887     }
 888
 889 /*
 890  * Some helper functions need to be passed the cpu_env. In order
 891  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
 892  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
 893  * and which call a NeonGenTwoOpEnvFn().
 894  */
 895 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
 896     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
 897     {                                                                   \
 898         FUNC(d, cpu_env, n, m);                                         \
 899     }
 900
 901 #define DO_3SAME_32_ENV(INSN, FUNC)                                     \
 902     WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
 903     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
 904     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
 905     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 906                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
 907                                 uint32_t oprsz, uint32_t maxsz)         \
 908     {                                                                   \
 909         static const GVecGen3 ops[4] = {                                \
 910             { .fni4 = gen_##INSN##_tramp8 },                            \
 911             { .fni4 = gen_##INSN##_tramp16 },                           \
 912             { .fni4 = gen_##INSN##_tramp32 },                           \
 913             { 0 },                                                      \
 914         };                                                              \
 915         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 916     }                                                                   \
 917     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 918     {                                                                   \
 919         if (a->size > 2) {                                              \
 920             return false;                                               \
 921         }                                                               \
 922         return do_3same(s, a, gen_##INSN##_3s);                         \
 923     }
 924
 925 DO_3SAME_32(VHADD_S, hadd_s)
 926 DO_3SAME_32(VHADD_U, hadd_u)
 927 DO_3SAME_32(VHSUB_S, hsub_s)
 928 DO_3SAME_32(VHSUB_U, hsub_u)
 929 DO_3SAME_32(VRHADD_S, rhadd_s)
 930 DO_3SAME_32(VRHADD_U, rhadd_u)
 931 DO_3SAME_32(VRSHL_S, rshl_s)
 932 DO_3SAME_32(VRSHL_U, rshl_u)
 933
 934 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
 935 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
 936 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
 937 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
 938
 939 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
 940 {
 941     /* Operations handled pairwise 32 bits at a time */
 942     TCGv_i32 tmp, tmp2, tmp3;
 943
 944     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 945         return false;
 946     }
 947
 948     /* UNDEF accesses to D16-D31 if they don't exist. */
 949     if (!dc_isar_feature(aa32_simd_r32, s) &&
 950         ((a->vd | a->vn | a->vm) & 0x10)) {
 951         return false;
 952     }
 953
 954     if (a->size == 3) {
 955         return false;
 956     }
 957
 958     if (!vfp_access_check(s)) {
 959         return true;
 960     }
 961
 962     assert(a->q == 0); /* enforced by decode patterns */
 963
 964     /*
 965      * Note that we have to be careful not to clobber the source operands
 966      * in the "vm == vd" case by storing the result of the first pass too
 967      * early. Since Q is 0 there are always just two passes, so instead
 968      * of a complicated loop over each pass we just unroll.
 969      */
 970     tmp = neon_load_reg(a->vn, 0);
 971     tmp2 = neon_load_reg(a->vn, 1);
 972     fn(tmp, tmp, tmp2);
 973     tcg_temp_free_i32(tmp2);
 974
 975     tmp3 = neon_load_reg(a->vm, 0);
 976     tmp2 = neon_load_reg(a->vm, 1);
 977     fn(tmp3, tmp3, tmp2);
 978     tcg_temp_free_i32(tmp2);
 979
 980     neon_store_reg(a->vd, 0, tmp);
 981     neon_store_reg(a->vd, 1, tmp3);
 982     return true;
 983 }
 984
 985 #define DO_3SAME_PAIR(INSN, func)                                       \
 986     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 987     {                                                                   \
 988         static NeonGenTwoOpFn * const fns[] = {                         \
 989             gen_helper_neon_##func##8,                                  \
 990             gen_helper_neon_##func##16,                                 \
 991             gen_helper_neon_##func##32,                                 \
 992         };                                                              \
 993         if (a->size > 2) {                                              \
 994             return false;                                               \
 995         }                                                               \
 996         return do_3same_pair(s, a, fns[a->size]);                       \
 997     }
 998
 999 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
1000 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
1001 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
1002 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
1003 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
1004 #define gen_helper_neon_padd_u32  tcg_gen_add_i32
1005
1006 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1007 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1008 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1009 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1010 DO_3SAME_PAIR(VPADD, padd_u)
1011
1012 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
1013     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
1014     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
1015     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
1016                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
1017                                 uint32_t oprsz, uint32_t maxsz)         \
1018     {                                                                   \
1019         static const GVecGen3 ops[2] = {                                \
1020             { .fni4 = gen_##INSN##_tramp16 },                           \
1021             { .fni4 = gen_##INSN##_tramp32 },                           \
1022         };                                                              \
1023         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1024     }                                                                   \
1025     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1026     {                                                                   \
1027         if (a->size != 1 && a->size != 2) {                             \
1028             return false;                                               \
1029         }                                                               \
1030         return do_3same(s, a, gen_##INSN##_3s);                         \
1031     }
1032
1033 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1034 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1035
1036 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
1037     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
1038                          uint32_t rn_ofs, uint32_t rm_ofs,              \
1039                          uint32_t oprsz, uint32_t maxsz)                \
1040     {                                                                   \
1041         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
1042         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
1043                            oprsz, maxsz, 0, FUNC);                      \
1044         tcg_temp_free_ptr(fpst);                                        \
1045     }
1046
1047 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
1048     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
1049     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
1050     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
1051     {                                                                   \
1052         if (a->size != 0) {                                             \
1053             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
1054                 return false;                                           \
1055             }                                                           \
1056             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
1057         }                                                               \
1058         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
1059     }
1060
1061
1062 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1063 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1064 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1065 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1066 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1067 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1068 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1069 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1070 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1071 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1072 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1073 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1074 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1075 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1076 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1077 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1078 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1079
1080 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1081 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1082 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1083 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1084
1085 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1086 {
1087     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1088         return false;
1089     }
1090
1091     if (a->size != 0) {
1092         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1093             return false;
1094         }
1095         return do_3same(s, a, gen_VMAXNM_fp16_3s);
1096     }
1097     return do_3same(s, a, gen_VMAXNM_fp32_3s);
1098 }
1099
1100 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1101 {
1102     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1103         return false;
1104     }
1105
1106     if (a->size != 0) {
1107         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1108             return false;
1109         }
1110         return do_3same(s, a, gen_VMINNM_fp16_3s);
1111     }
1112     return do_3same(s, a, gen_VMINNM_fp32_3s);
1113 }
1114
1115 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1116                              gen_helper_gvec_3_ptr *fn)
1117 {
1118     /* FP pairwise operations */
1119     TCGv_ptr fpstatus;
1120
1121     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1122         return false;
1123     }
1124
1125     /* UNDEF accesses to D16-D31 if they don't exist. */
1126     if (!dc_isar_feature(aa32_simd_r32, s) &&
1127         ((a->vd | a->vn | a->vm) & 0x10)) {
1128         return false;
1129     }
1130
1131     if (!vfp_access_check(s)) {
1132         return true;
1133     }
1134
1135     assert(a->q == 0); /* enforced by decode patterns */
1136
1137
1138     fpstatus = fpstatus_ptr(a->size != 0 ? FPST_STD_F16 : FPST_STD);
1139     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1140                        vfp_reg_offset(1, a->vn),
1141                        vfp_reg_offset(1, a->vm),
1142                        fpstatus, 8, 8, 0, fn);
1143     tcg_temp_free_ptr(fpstatus);
1144
1145     return true;
1146 }
1147
1148 /*
1149  * For all the functions using this macro, size == 1 means fp16,
1150  * which is an architecture extension we don't implement yet.
1151  */
1152 #define DO_3S_FP_PAIR(INSN,FUNC)                                    \
1153     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1154     {                                                               \
1155         if (a->size != 0) {                                         \
1156             if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
1157                 return false;                                       \
1158             }                                                       \
1159             return do_3same_fp_pair(s, a, FUNC##h);                 \
1160         }                                                           \
1161         return do_3same_fp_pair(s, a, FUNC##s);                     \
1162     }
1163
1164 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1165 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1166 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1167
1168 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1169 {
1170     /* Handle a 2-reg-shift insn which can be vectorized. */
1171     int vec_size = a->q ? 16 : 8;
1172     int rd_ofs = neon_reg_offset(a->vd, 0);
1173     int rm_ofs = neon_reg_offset(a->vm, 0);
1174
1175     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1176         return false;
1177     }
1178
1179     /* UNDEF accesses to D16-D31 if they don't exist. */
1180     if (!dc_isar_feature(aa32_simd_r32, s) &&
1181         ((a->vd | a->vm) & 0x10)) {
1182         return false;
1183     }
1184
1185     if ((a->vm | a->vd) & a->q) {
1186         return false;
1187     }
1188
1189     if (!vfp_access_check(s)) {
1190         return true;
1191     }
1192
1193     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1194     return true;
1195 }
1196
1197 #define DO_2SH(INSN, FUNC)                                              \
1198     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1199     {                                                                   \
1200         return do_vector_2sh(s, a, FUNC);                               \
1201     }                                                                   \
1202
1203 DO_2SH(VSHL, tcg_gen_gvec_shli)
1204 DO_2SH(VSLI, gen_gvec_sli)
1205 DO_2SH(VSRI, gen_gvec_sri)
1206 DO_2SH(VSRA_S, gen_gvec_ssra)
1207 DO_2SH(VSRA_U, gen_gvec_usra)
1208 DO_2SH(VRSHR_S, gen_gvec_srshr)
1209 DO_2SH(VRSHR_U, gen_gvec_urshr)
1210 DO_2SH(VRSRA_S, gen_gvec_srsra)
1211 DO_2SH(VRSRA_U, gen_gvec_ursra)
1212
1213 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1214 {
1215     /* Signed shift out of range results in all-sign-bits */
1216     a->shift = MIN(a->shift, (8 << a->size) - 1);
1217     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1218 }
1219
1220 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1221                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
1222 {
1223     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1224 }
1225
1226 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1227 {
1228     /* Shift out of range is architecturally valid and results in zero. */
1229     if (a->shift >= (8 << a->size)) {
1230         return do_vector_2sh(s, a, gen_zero_rd_2sh);
1231     } else {
1232         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1233     }
1234 }
1235
1236 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1237                              NeonGenTwo64OpEnvFn *fn)
1238 {
1239     /*
1240      * 2-reg-and-shift operations, size == 3 case, where the
1241      * function needs to be passed cpu_env.
1242      */
1243     TCGv_i64 constimm;
1244     int pass;
1245
1246     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1247         return false;
1248     }
1249
1250     /* UNDEF accesses to D16-D31 if they don't exist. */
1251     if (!dc_isar_feature(aa32_simd_r32, s) &&
1252         ((a->vd | a->vm) & 0x10)) {
1253         return false;
1254     }
1255
1256     if ((a->vm | a->vd) & a->q) {
1257         return false;
1258     }
1259
1260     if (!vfp_access_check(s)) {
1261         return true;
1262     }
1263
1264     /*
1265      * To avoid excessive duplication of ops we implement shift
1266      * by immediate using the variable shift operations.
1267      */
1268     constimm = tcg_const_i64(dup_const(a->size, a->shift));
1269
1270     for (pass = 0; pass < a->q + 1; pass++) {
1271         TCGv_i64 tmp = tcg_temp_new_i64();
1272
1273         neon_load_reg64(tmp, a->vm + pass);
1274         fn(tmp, cpu_env, tmp, constimm);
1275         neon_store_reg64(tmp, a->vd + pass);
1276         tcg_temp_free_i64(tmp);
1277     }
1278     tcg_temp_free_i64(constimm);
1279     return true;
1280 }
1281
1282 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1283                              NeonGenTwoOpEnvFn *fn)
1284 {
1285     /*
1286      * 2-reg-and-shift operations, size < 3 case, where the
1287      * helper needs to be passed cpu_env.
1288      */
1289     TCGv_i32 constimm;
1290     int pass;
1291
1292     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1293         return false;
1294     }
1295
1296     /* UNDEF accesses to D16-D31 if they don't exist. */
1297     if (!dc_isar_feature(aa32_simd_r32, s) &&
1298         ((a->vd | a->vm) & 0x10)) {
1299         return false;
1300     }
1301
1302     if ((a->vm | a->vd) & a->q) {
1303         return false;
1304     }
1305
1306     if (!vfp_access_check(s)) {
1307         return true;
1308     }
1309
1310     /*
1311      * To avoid excessive duplication of ops we implement shift
1312      * by immediate using the variable shift operations.
1313      */
1314     constimm = tcg_const_i32(dup_const(a->size, a->shift));
1315
1316     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1317         TCGv_i32 tmp = neon_load_reg(a->vm, pass);
1318         fn(tmp, cpu_env, tmp, constimm);
1319         neon_store_reg(a->vd, pass, tmp);
1320     }
1321     tcg_temp_free_i32(constimm);
1322     return true;
1323 }
1324
1325 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1326     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1327     {                                                                   \
1328         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1329     }                                                                   \
1330     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1331     {                                                                   \
1332         static NeonGenTwoOpEnvFn * const fns[] = {                      \
1333             gen_helper_neon_##FUNC##8,                                  \
1334             gen_helper_neon_##FUNC##16,                                 \
1335             gen_helper_neon_##FUNC##32,                                 \
1336         };                                                              \
1337         assert(a->size < ARRAY_SIZE(fns));                              \
1338         return do_2shift_env_32(s, a, fns[a->size]);                    \
1339     }
1340
1341 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1342 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1343 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1344
1345 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1346                                 NeonGenTwo64OpFn *shiftfn,
1347                                 NeonGenNarrowEnvFn *narrowfn)
1348 {
1349     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1350     TCGv_i64 constimm, rm1, rm2;
1351     TCGv_i32 rd;
1352
1353     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1354         return false;
1355     }
1356
1357     /* UNDEF accesses to D16-D31 if they don't exist. */
1358     if (!dc_isar_feature(aa32_simd_r32, s) &&
1359         ((a->vd | a->vm) & 0x10)) {
1360         return false;
1361     }
1362
1363     if (a->vm & 1) {
1364         return false;
1365     }
1366
1367     if (!vfp_access_check(s)) {
1368         return true;
1369     }
1370
1371     /*
1372      * This is always a right shift, and the shiftfn is always a
1373      * left-shift helper, which thus needs the negated shift count.
1374      */
1375     constimm = tcg_const_i64(-a->shift);
1376     rm1 = tcg_temp_new_i64();
1377     rm2 = tcg_temp_new_i64();
1378
1379     /* Load both inputs first to avoid potential overwrite if rm == rd */
1380     neon_load_reg64(rm1, a->vm);
1381     neon_load_reg64(rm2, a->vm + 1);
1382
1383     shiftfn(rm1, rm1, constimm);
1384     rd = tcg_temp_new_i32();
1385     narrowfn(rd, cpu_env, rm1);
1386     neon_store_reg(a->vd, 0, rd);
1387
1388     shiftfn(rm2, rm2, constimm);
1389     rd = tcg_temp_new_i32();
1390     narrowfn(rd, cpu_env, rm2);
1391     neon_store_reg(a->vd, 1, rd);
1392
1393     tcg_temp_free_i64(rm1);
1394     tcg_temp_free_i64(rm2);
1395     tcg_temp_free_i64(constimm);
1396
1397     return true;
1398 }
1399
1400 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1401                                 NeonGenTwoOpFn *shiftfn,
1402                                 NeonGenNarrowEnvFn *narrowfn)
1403 {
1404     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1405     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1406     TCGv_i64 rtmp;
1407     uint32_t imm;
1408
1409     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1410         return false;
1411     }
1412
1413     /* UNDEF accesses to D16-D31 if they don't exist. */
1414     if (!dc_isar_feature(aa32_simd_r32, s) &&
1415         ((a->vd | a->vm) & 0x10)) {
1416         return false;
1417     }
1418
1419     if (a->vm & 1) {
1420         return false;
1421     }
1422
1423     if (!vfp_access_check(s)) {
1424         return true;
1425     }
1426
1427     /*
1428      * This is always a right shift, and the shiftfn is always a
1429      * left-shift helper, which thus needs the negated shift count
1430      * duplicated into each lane of the immediate value.
1431      */
1432     if (a->size == 1) {
1433         imm = (uint16_t)(-a->shift);
1434         imm |= imm << 16;
1435     } else {
1436         /* size == 2 */
1437         imm = -a->shift;
1438     }
1439     constimm = tcg_const_i32(imm);
1440
1441     /* Load all inputs first to avoid potential overwrite */
1442     rm1 = neon_load_reg(a->vm, 0);
1443     rm2 = neon_load_reg(a->vm, 1);
1444     rm3 = neon_load_reg(a->vm + 1, 0);
1445     rm4 = neon_load_reg(a->vm + 1, 1);
1446     rtmp = tcg_temp_new_i64();
1447
1448     shiftfn(rm1, rm1, constimm);
1449     shiftfn(rm2, rm2, constimm);
1450
1451     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1452     tcg_temp_free_i32(rm2);
1453
1454     narrowfn(rm1, cpu_env, rtmp);
1455     neon_store_reg(a->vd, 0, rm1);
1456
1457     shiftfn(rm3, rm3, constimm);
1458     shiftfn(rm4, rm4, constimm);
1459     tcg_temp_free_i32(constimm);
1460
1461     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1462     tcg_temp_free_i32(rm4);
1463
1464     narrowfn(rm3, cpu_env, rtmp);
1465     tcg_temp_free_i64(rtmp);
1466     neon_store_reg(a->vd, 1, rm3);
1467     return true;
1468 }
1469
1470 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1471     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1472     {                                                                   \
1473         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1474     }
1475 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1476     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1477     {                                                                   \
1478         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1479     }
1480
1481 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1482 {
1483     tcg_gen_extrl_i64_i32(dest, src);
1484 }
1485
1486 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1487 {
1488     gen_helper_neon_narrow_u16(dest, src);
1489 }
1490
1491 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1492 {
1493     gen_helper_neon_narrow_u8(dest, src);
1494 }
1495
1496 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1497 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1498 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1499
1500 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1501 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1502 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1503
1504 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1505 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1506 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1507
1508 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1509 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1510 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1511 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1512 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1513 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1514
1515 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1516 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1517 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1518
1519 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1520 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1521 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1522
1523 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1524 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1525 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1526
1527 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1528                          NeonGenWidenFn *widenfn, bool u)
1529 {
1530     TCGv_i64 tmp;
1531     TCGv_i32 rm0, rm1;
1532     uint64_t widen_mask = 0;
1533
1534     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1535         return false;
1536     }
1537
1538     /* UNDEF accesses to D16-D31 if they don't exist. */
1539     if (!dc_isar_feature(aa32_simd_r32, s) &&
1540         ((a->vd | a->vm) & 0x10)) {
1541         return false;
1542     }
1543
1544     if (a->vd & 1) {
1545         return false;
1546     }
1547
1548     if (!vfp_access_check(s)) {
1549         return true;
1550     }
1551
1552     /*
1553      * This is a widen-and-shift operation. The shift is always less
1554      * than the width of the source type, so after widening the input
1555      * vector we can simply shift the whole 64-bit widened register,
1556      * and then clear the potential overflow bits resulting from left
1557      * bits of the narrow input appearing as right bits of the left
1558      * neighbour narrow input. Calculate a mask of bits to clear.
1559      */
1560     if ((a->shift != 0) && (a->size < 2 || u)) {
1561         int esize = 8 << a->size;
1562         widen_mask = MAKE_64BIT_MASK(0, esize);
1563         widen_mask >>= esize - a->shift;
1564         widen_mask = dup_const(a->size + 1, widen_mask);
1565     }
1566
1567     rm0 = neon_load_reg(a->vm, 0);
1568     rm1 = neon_load_reg(a->vm, 1);
1569     tmp = tcg_temp_new_i64();
1570
1571     widenfn(tmp, rm0);
1572     tcg_temp_free_i32(rm0);
1573     if (a->shift != 0) {
1574         tcg_gen_shli_i64(tmp, tmp, a->shift);
1575         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1576     }
1577     neon_store_reg64(tmp, a->vd);
1578
1579     widenfn(tmp, rm1);
1580     tcg_temp_free_i32(rm1);
1581     if (a->shift != 0) {
1582         tcg_gen_shli_i64(tmp, tmp, a->shift);
1583         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1584     }
1585     neon_store_reg64(tmp, a->vd + 1);
1586     tcg_temp_free_i64(tmp);
1587     return true;
1588 }
1589
1590 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1591 {
1592     static NeonGenWidenFn * const widenfn[] = {
1593         gen_helper_neon_widen_s8,
1594         gen_helper_neon_widen_s16,
1595         tcg_gen_ext_i32_i64,
1596     };
1597     return do_vshll_2sh(s, a, widenfn[a->size], false);
1598 }
1599
1600 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1601 {
1602     static NeonGenWidenFn * const widenfn[] = {
1603         gen_helper_neon_widen_u8,
1604         gen_helper_neon_widen_u16,
1605         tcg_gen_extu_i32_i64,
1606     };
1607     return do_vshll_2sh(s, a, widenfn[a->size], true);
1608 }
1609
1610 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1611                       gen_helper_gvec_2_ptr *fn)
1612 {
1613     /* FP operations in 2-reg-and-shift group */
1614     int vec_size = a->q ? 16 : 8;
1615     int rd_ofs = neon_reg_offset(a->vd, 0);
1616     int rm_ofs = neon_reg_offset(a->vm, 0);
1617     TCGv_ptr fpst;
1618
1619     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1620         return false;
1621     }
1622
1623     if (a->size != 0) {
1624         if (!dc_isar_feature(aa32_fp16_arith, s)) {
1625             return false;
1626         }
1627     }
1628
1629     /* UNDEF accesses to D16-D31 if they don't exist. */
1630     if (!dc_isar_feature(aa32_simd_r32, s) &&
1631         ((a->vd | a->vm) & 0x10)) {
1632         return false;
1633     }
1634
1635     if ((a->vm | a->vd) & a->q) {
1636         return false;
1637     }
1638
1639     if (!vfp_access_check(s)) {
1640         return true;
1641     }
1642
1643     fpst = fpstatus_ptr(a->size ? FPST_STD_F16 : FPST_STD);
1644     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1645     tcg_temp_free_ptr(fpst);
1646     return true;
1647 }
1648
1649 #define DO_FP_2SH(INSN, FUNC)                                           \
1650     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1651     {                                                                   \
1652         return do_fp_2sh(s, a, FUNC);                                   \
1653     }
1654
1655 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1656 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1657 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1658 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1659
1660 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1661 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1662 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1663 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1664
1665 static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
1666 {
1667     /*
1668      * Expand the encoded constant.
1669      * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
1670      * We choose to not special-case this and will behave as if a
1671      * valid constant encoding of 0 had been given.
1672      * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
1673      */
1674     switch (cmode) {
1675     case 0: case 1:
1676         /* no-op */
1677         break;
1678     case 2: case 3:
1679         imm <<= 8;
1680         break;
1681     case 4: case 5:
1682         imm <<= 16;
1683         break;
1684     case 6: case 7:
1685         imm <<= 24;
1686         break;
1687     case 8: case 9:
1688         imm |= imm << 16;
1689         break;
1690     case 10: case 11:
1691         imm = (imm << 8) | (imm << 24);
1692         break;
1693     case 12:
1694         imm = (imm << 8) | 0xff;
1695         break;
1696     case 13:
1697         imm = (imm << 16) | 0xffff;
1698         break;
1699     case 14:
1700         if (op) {
1701             /*
1702              * This is the only case where the top and bottom 32 bits
1703              * of the encoded constant differ.
1704              */
1705             uint64_t imm64 = 0;
1706             int n;
1707
1708             for (n = 0; n < 8; n++) {
1709                 if (imm & (1 << n)) {
1710                     imm64 |= (0xffULL << (n * 8));
1711                 }
1712             }
1713             return imm64;
1714         }
1715         imm |= (imm << 8) | (imm << 16) | (imm << 24);
1716         break;
1717     case 15:
1718         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
1719             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
1720         break;
1721     }
1722     if (op) {
1723         imm = ~imm;
1724     }
1725     return dup_const(MO_32, imm);
1726 }
1727
1728 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1729                         GVecGen2iFn *fn)
1730 {
1731     uint64_t imm;
1732     int reg_ofs, vec_size;
1733
1734     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1735         return false;
1736     }
1737
1738     /* UNDEF accesses to D16-D31 if they don't exist. */
1739     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1740         return false;
1741     }
1742
1743     if (a->vd & a->q) {
1744         return false;
1745     }
1746
1747     if (!vfp_access_check(s)) {
1748         return true;
1749     }
1750
1751     reg_ofs = neon_reg_offset(a->vd, 0);
1752     vec_size = a->q ? 16 : 8;
1753     imm = asimd_imm_const(a->imm, a->cmode, a->op);
1754
1755     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1756     return true;
1757 }
1758
1759 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1760                         int64_t c, uint32_t oprsz, uint32_t maxsz)
1761 {
1762     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1763 }
1764
1765 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1766 {
1767     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1768     GVecGen2iFn *fn;
1769
1770     if ((a->cmode & 1) && a->cmode < 12) {
1771         /* for op=1, the imm will be inverted, so BIC becomes AND. */
1772         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1773     } else {
1774         /* There is one unallocated cmode/op combination in this space */
1775         if (a->cmode == 15 && a->op == 1) {
1776             return false;
1777         }
1778         fn = gen_VMOV_1r;
1779     }
1780     return do_1reg_imm(s, a, fn);
1781 }
1782
1783 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1784                            NeonGenWidenFn *widenfn,
1785                            NeonGenTwo64OpFn *opfn,
1786                            bool src1_wide)
1787 {
1788     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1789     TCGv_i64 rn0_64, rn1_64, rm_64;
1790     TCGv_i32 rm;
1791
1792     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1793         return false;
1794     }
1795
1796     /* UNDEF accesses to D16-D31 if they don't exist. */
1797     if (!dc_isar_feature(aa32_simd_r32, s) &&
1798         ((a->vd | a->vn | a->vm) & 0x10)) {
1799         return false;
1800     }
1801
1802     if (!widenfn || !opfn) {
1803         /* size == 3 case, which is an entirely different insn group */
1804         return false;
1805     }
1806
1807     if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
1808         return false;
1809     }
1810
1811     if (!vfp_access_check(s)) {
1812         return true;
1813     }
1814
1815     rn0_64 = tcg_temp_new_i64();
1816     rn1_64 = tcg_temp_new_i64();
1817     rm_64 = tcg_temp_new_i64();
1818
1819     if (src1_wide) {
1820         neon_load_reg64(rn0_64, a->vn);
1821     } else {
1822         TCGv_i32 tmp = neon_load_reg(a->vn, 0);
1823         widenfn(rn0_64, tmp);
1824         tcg_temp_free_i32(tmp);
1825     }
1826     rm = neon_load_reg(a->vm, 0);
1827
1828     widenfn(rm_64, rm);
1829     tcg_temp_free_i32(rm);
1830     opfn(rn0_64, rn0_64, rm_64);
1831
1832     /*
1833      * Load second pass inputs before storing the first pass result, to
1834      * avoid incorrect results if a narrow input overlaps with the result.
1835      */
1836     if (src1_wide) {
1837         neon_load_reg64(rn1_64, a->vn + 1);
1838     } else {
1839         TCGv_i32 tmp = neon_load_reg(a->vn, 1);
1840         widenfn(rn1_64, tmp);
1841         tcg_temp_free_i32(tmp);
1842     }
1843     rm = neon_load_reg(a->vm, 1);
1844
1845     neon_store_reg64(rn0_64, a->vd);
1846
1847     widenfn(rm_64, rm);
1848     tcg_temp_free_i32(rm);
1849     opfn(rn1_64, rn1_64, rm_64);
1850     neon_store_reg64(rn1_64, a->vd + 1);
1851
1852     tcg_temp_free_i64(rn0_64);
1853     tcg_temp_free_i64(rn1_64);
1854     tcg_temp_free_i64(rm_64);
1855
1856     return true;
1857 }
1858
1859 #define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE)                         \
1860     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1861     {                                                                   \
1862         static NeonGenWidenFn * const widenfn[] = {                     \
1863             gen_helper_neon_widen_##S##8,                               \
1864             gen_helper_neon_widen_##S##16,                              \
1865             tcg_gen_##EXT##_i32_i64,                                    \
1866             NULL,                                                       \
1867         };                                                              \
1868         static NeonGenTwo64OpFn * const addfn[] = {                     \
1869             gen_helper_neon_##OP##l_u16,                                \
1870             gen_helper_neon_##OP##l_u32,                                \
1871             tcg_gen_##OP##_i64,                                         \
1872             NULL,                                                       \
1873         };                                                              \
1874         return do_prewiden_3d(s, a, widenfn[a->size],                   \
1875                               addfn[a->size], SRC1WIDE);                \
1876     }
1877
1878 DO_PREWIDEN(VADDL_S, s, ext, add, false)
1879 DO_PREWIDEN(VADDL_U, u, extu, add, false)
1880 DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
1881 DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
1882 DO_PREWIDEN(VADDW_S, s, ext, add, true)
1883 DO_PREWIDEN(VADDW_U, u, extu, add, true)
1884 DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
1885 DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
1886
1887 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1888                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1889 {
1890     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1891     TCGv_i64 rn_64, rm_64;
1892     TCGv_i32 rd0, rd1;
1893
1894     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1895         return false;
1896     }
1897
1898     /* UNDEF accesses to D16-D31 if they don't exist. */
1899     if (!dc_isar_feature(aa32_simd_r32, s) &&
1900         ((a->vd | a->vn | a->vm) & 0x10)) {
1901         return false;
1902     }
1903
1904     if (!opfn || !narrowfn) {
1905         /* size == 3 case, which is an entirely different insn group */
1906         return false;
1907     }
1908
1909     if ((a->vn | a->vm) & 1) {
1910         return false;
1911     }
1912
1913     if (!vfp_access_check(s)) {
1914         return true;
1915     }
1916
1917     rn_64 = tcg_temp_new_i64();
1918     rm_64 = tcg_temp_new_i64();
1919     rd0 = tcg_temp_new_i32();
1920     rd1 = tcg_temp_new_i32();
1921
1922     neon_load_reg64(rn_64, a->vn);
1923     neon_load_reg64(rm_64, a->vm);
1924
1925     opfn(rn_64, rn_64, rm_64);
1926
1927     narrowfn(rd0, rn_64);
1928
1929     neon_load_reg64(rn_64, a->vn + 1);
1930     neon_load_reg64(rm_64, a->vm + 1);
1931
1932     opfn(rn_64, rn_64, rm_64);
1933
1934     narrowfn(rd1, rn_64);
1935
1936     neon_store_reg(a->vd, 0, rd0);
1937     neon_store_reg(a->vd, 1, rd1);
1938
1939     tcg_temp_free_i64(rn_64);
1940     tcg_temp_free_i64(rm_64);
1941
1942     return true;
1943 }
1944
1945 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1946     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1947     {                                                                   \
1948         static NeonGenTwo64OpFn * const addfn[] = {                     \
1949             gen_helper_neon_##OP##l_u16,                                \
1950             gen_helper_neon_##OP##l_u32,                                \
1951             tcg_gen_##OP##_i64,                                         \
1952             NULL,                                                       \
1953         };                                                              \
1954         static NeonGenNarrowFn * const narrowfn[] = {                   \
1955             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1956             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1957             EXTOP,                                                      \
1958             NULL,                                                       \
1959         };                                                              \
1960         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1961     }
1962
1963 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1964 {
1965     tcg_gen_addi_i64(rn, rn, 1u << 31);
1966     tcg_gen_extrh_i64_i32(rd, rn);
1967 }
1968
1969 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1970 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1971 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1972 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1973
1974 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1975                        NeonGenTwoOpWidenFn *opfn,
1976                        NeonGenTwo64OpFn *accfn)
1977 {
1978     /*
1979      * 3-regs different lengths, long operations.
1980      * These perform an operation on two inputs that returns a double-width
1981      * result, and then possibly perform an accumulation operation of
1982      * that result into the double-width destination.
1983      */
1984     TCGv_i64 rd0, rd1, tmp;
1985     TCGv_i32 rn, rm;
1986
1987     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1988         return false;
1989     }
1990
1991     /* UNDEF accesses to D16-D31 if they don't exist. */
1992     if (!dc_isar_feature(aa32_simd_r32, s) &&
1993         ((a->vd | a->vn | a->vm) & 0x10)) {
1994         return false;
1995     }
1996
1997     if (!opfn) {
1998         /* size == 3 case, which is an entirely different insn group */
1999         return false;
2000     }
2001
2002     if (a->vd & 1) {
2003         return false;
2004     }
2005
2006     if (!vfp_access_check(s)) {
2007         return true;
2008     }
2009
2010     rd0 = tcg_temp_new_i64();
2011     rd1 = tcg_temp_new_i64();
2012
2013     rn = neon_load_reg(a->vn, 0);
2014     rm = neon_load_reg(a->vm, 0);
2015     opfn(rd0, rn, rm);
2016     tcg_temp_free_i32(rn);
2017     tcg_temp_free_i32(rm);
2018
2019     rn = neon_load_reg(a->vn, 1);
2020     rm = neon_load_reg(a->vm, 1);
2021     opfn(rd1, rn, rm);
2022     tcg_temp_free_i32(rn);
2023     tcg_temp_free_i32(rm);
2024
2025     /* Don't store results until after all loads: they might overlap */
2026     if (accfn) {
2027         tmp = tcg_temp_new_i64();
2028         neon_load_reg64(tmp, a->vd);
2029         accfn(tmp, tmp, rd0);
2030         neon_store_reg64(tmp, a->vd);
2031         neon_load_reg64(tmp, a->vd + 1);
2032         accfn(tmp, tmp, rd1);
2033         neon_store_reg64(tmp, a->vd + 1);
2034         tcg_temp_free_i64(tmp);
2035     } else {
2036         neon_store_reg64(rd0, a->vd);
2037         neon_store_reg64(rd1, a->vd + 1);
2038     }
2039
2040     tcg_temp_free_i64(rd0);
2041     tcg_temp_free_i64(rd1);
2042
2043     return true;
2044 }
2045
2046 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2047 {
2048     static NeonGenTwoOpWidenFn * const opfn[] = {
2049         gen_helper_neon_abdl_s16,
2050         gen_helper_neon_abdl_s32,
2051         gen_helper_neon_abdl_s64,
2052         NULL,
2053     };
2054
2055     return do_long_3d(s, a, opfn[a->size], NULL);
2056 }
2057
2058 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2059 {
2060     static NeonGenTwoOpWidenFn * const opfn[] = {
2061         gen_helper_neon_abdl_u16,
2062         gen_helper_neon_abdl_u32,
2063         gen_helper_neon_abdl_u64,
2064         NULL,
2065     };
2066
2067     return do_long_3d(s, a, opfn[a->size], NULL);
2068 }
2069
2070 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2071 {
2072     static NeonGenTwoOpWidenFn * const opfn[] = {
2073         gen_helper_neon_abdl_s16,
2074         gen_helper_neon_abdl_s32,
2075         gen_helper_neon_abdl_s64,
2076         NULL,
2077     };
2078     static NeonGenTwo64OpFn * const addfn[] = {
2079         gen_helper_neon_addl_u16,
2080         gen_helper_neon_addl_u32,
2081         tcg_gen_add_i64,
2082         NULL,
2083     };
2084
2085     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2086 }
2087
2088 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2089 {
2090     static NeonGenTwoOpWidenFn * const opfn[] = {
2091         gen_helper_neon_abdl_u16,
2092         gen_helper_neon_abdl_u32,
2093         gen_helper_neon_abdl_u64,
2094         NULL,
2095     };
2096     static NeonGenTwo64OpFn * const addfn[] = {
2097         gen_helper_neon_addl_u16,
2098         gen_helper_neon_addl_u32,
2099         tcg_gen_add_i64,
2100         NULL,
2101     };
2102
2103     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2104 }
2105
2106 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2107 {
2108     TCGv_i32 lo = tcg_temp_new_i32();
2109     TCGv_i32 hi = tcg_temp_new_i32();
2110
2111     tcg_gen_muls2_i32(lo, hi, rn, rm);
2112     tcg_gen_concat_i32_i64(rd, lo, hi);
2113
2114     tcg_temp_free_i32(lo);
2115     tcg_temp_free_i32(hi);
2116 }
2117
2118 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2119 {
2120     TCGv_i32 lo = tcg_temp_new_i32();
2121     TCGv_i32 hi = tcg_temp_new_i32();
2122
2123     tcg_gen_mulu2_i32(lo, hi, rn, rm);
2124     tcg_gen_concat_i32_i64(rd, lo, hi);
2125
2126     tcg_temp_free_i32(lo);
2127     tcg_temp_free_i32(hi);
2128 }
2129
2130 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2131 {
2132     static NeonGenTwoOpWidenFn * const opfn[] = {
2133         gen_helper_neon_mull_s8,
2134         gen_helper_neon_mull_s16,
2135         gen_mull_s32,
2136         NULL,
2137     };
2138
2139     return do_long_3d(s, a, opfn[a->size], NULL);
2140 }
2141
2142 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2143 {
2144     static NeonGenTwoOpWidenFn * const opfn[] = {
2145         gen_helper_neon_mull_u8,
2146         gen_helper_neon_mull_u16,
2147         gen_mull_u32,
2148         NULL,
2149     };
2150
2151     return do_long_3d(s, a, opfn[a->size], NULL);
2152 }
2153
2154 #define DO_VMLAL(INSN,MULL,ACC)                                         \
2155     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
2156     {                                                                   \
2157         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2158             gen_helper_neon_##MULL##8,                                  \
2159             gen_helper_neon_##MULL##16,                                 \
2160             gen_##MULL##32,                                             \
2161             NULL,                                                       \
2162         };                                                              \
2163         static NeonGenTwo64OpFn * const accfn[] = {                     \
2164             gen_helper_neon_##ACC##l_u16,                               \
2165             gen_helper_neon_##ACC##l_u32,                               \
2166             tcg_gen_##ACC##_i64,                                        \
2167             NULL,                                                       \
2168         };                                                              \
2169         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
2170     }
2171
2172 DO_VMLAL(VMLAL_S,mull_s,add)
2173 DO_VMLAL(VMLAL_U,mull_u,add)
2174 DO_VMLAL(VMLSL_S,mull_s,sub)
2175 DO_VMLAL(VMLSL_U,mull_u,sub)
2176
2177 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2178 {
2179     gen_helper_neon_mull_s16(rd, rn, rm);
2180     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2181 }
2182
2183 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2184 {
2185     gen_mull_s32(rd, rn, rm);
2186     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2187 }
2188
2189 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2190 {
2191     static NeonGenTwoOpWidenFn * const opfn[] = {
2192         NULL,
2193         gen_VQDMULL_16,
2194         gen_VQDMULL_32,
2195         NULL,
2196     };
2197
2198     return do_long_3d(s, a, opfn[a->size], NULL);
2199 }
2200
2201 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2202 {
2203     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2204 }
2205
2206 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2207 {
2208     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2209 }
2210
2211 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2212 {
2213     static NeonGenTwoOpWidenFn * const opfn[] = {
2214         NULL,
2215         gen_VQDMULL_16,
2216         gen_VQDMULL_32,
2217         NULL,
2218     };
2219     static NeonGenTwo64OpFn * const accfn[] = {
2220         NULL,
2221         gen_VQDMLAL_acc_16,
2222         gen_VQDMLAL_acc_32,
2223         NULL,
2224     };
2225
2226     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2227 }
2228
2229 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2230 {
2231     gen_helper_neon_negl_u32(rm, rm);
2232     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2233 }
2234
2235 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2236 {
2237     tcg_gen_neg_i64(rm, rm);
2238     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2239 }
2240
2241 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2242 {
2243     static NeonGenTwoOpWidenFn * const opfn[] = {
2244         NULL,
2245         gen_VQDMULL_16,
2246         gen_VQDMULL_32,
2247         NULL,
2248     };
2249     static NeonGenTwo64OpFn * const accfn[] = {
2250         NULL,
2251         gen_VQDMLSL_acc_16,
2252         gen_VQDMLSL_acc_32,
2253         NULL,
2254     };
2255
2256     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2257 }
2258
2259 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2260 {
2261     gen_helper_gvec_3 *fn_gvec;
2262
2263     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2264         return false;
2265     }
2266
2267     /* UNDEF accesses to D16-D31 if they don't exist. */
2268     if (!dc_isar_feature(aa32_simd_r32, s) &&
2269         ((a->vd | a->vn | a->vm) & 0x10)) {
2270         return false;
2271     }
2272
2273     if (a->vd & 1) {
2274         return false;
2275     }
2276
2277     switch (a->size) {
2278     case 0:
2279         fn_gvec = gen_helper_neon_pmull_h;
2280         break;
2281     case 2:
2282         if (!dc_isar_feature(aa32_pmull, s)) {
2283             return false;
2284         }
2285         fn_gvec = gen_helper_gvec_pmull_q;
2286         break;
2287     default:
2288         return false;
2289     }
2290
2291     if (!vfp_access_check(s)) {
2292         return true;
2293     }
2294
2295     tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
2296                        neon_reg_offset(a->vn, 0),
2297                        neon_reg_offset(a->vm, 0),
2298                        16, 16, 0, fn_gvec);
2299     return true;
2300 }
2301
2302 static void gen_neon_dup_low16(TCGv_i32 var)
2303 {
2304     TCGv_i32 tmp = tcg_temp_new_i32();
2305     tcg_gen_ext16u_i32(var, var);
2306     tcg_gen_shli_i32(tmp, var, 16);
2307     tcg_gen_or_i32(var, var, tmp);
2308     tcg_temp_free_i32(tmp);
2309 }
2310
2311 static void gen_neon_dup_high16(TCGv_i32 var)
2312 {
2313     TCGv_i32 tmp = tcg_temp_new_i32();
2314     tcg_gen_andi_i32(var, var, 0xffff0000);
2315     tcg_gen_shri_i32(tmp, var, 16);
2316     tcg_gen_or_i32(var, var, tmp);
2317     tcg_temp_free_i32(tmp);
2318 }
2319
2320 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2321 {
2322     TCGv_i32 tmp;
2323     if (size == 1) {
2324         tmp = neon_load_reg(reg & 7, reg >> 4);
2325         if (reg & 8) {
2326             gen_neon_dup_high16(tmp);
2327         } else {
2328             gen_neon_dup_low16(tmp);
2329         }
2330     } else {
2331         tmp = neon_load_reg(reg & 15, reg >> 4);
2332     }
2333     return tmp;
2334 }
2335
2336 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2337                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2338 {
2339     /*
2340      * Two registers and a scalar: perform an operation between
2341      * the input elements and the scalar, and then possibly
2342      * perform an accumulation operation of that result into the
2343      * destination.
2344      */
2345     TCGv_i32 scalar;
2346     int pass;
2347
2348     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2349         return false;
2350     }
2351
2352     /* UNDEF accesses to D16-D31 if they don't exist. */
2353     if (!dc_isar_feature(aa32_simd_r32, s) &&
2354         ((a->vd | a->vn | a->vm) & 0x10)) {
2355         return false;
2356     }
2357
2358     if (!opfn) {
2359         /* Bad size (including size == 3, which is a different insn group) */
2360         return false;
2361     }
2362
2363     if (a->q && ((a->vd | a->vn) & 1)) {
2364         return false;
2365     }
2366
2367     if (!vfp_access_check(s)) {
2368         return true;
2369     }
2370
2371     scalar = neon_get_scalar(a->size, a->vm);
2372
2373     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2374         TCGv_i32 tmp = neon_load_reg(a->vn, pass);
2375         opfn(tmp, tmp, scalar);
2376         if (accfn) {
2377             TCGv_i32 rd = neon_load_reg(a->vd, pass);
2378             accfn(tmp, rd, tmp);
2379             tcg_temp_free_i32(rd);
2380         }
2381         neon_store_reg(a->vd, pass, tmp);
2382     }
2383     tcg_temp_free_i32(scalar);
2384     return true;
2385 }
2386
2387 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2388 {
2389     static NeonGenTwoOpFn * const opfn[] = {
2390         NULL,
2391         gen_helper_neon_mul_u16,
2392         tcg_gen_mul_i32,
2393         NULL,
2394     };
2395
2396     return do_2scalar(s, a, opfn[a->size], NULL);
2397 }
2398
2399 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2400 {
2401     static NeonGenTwoOpFn * const opfn[] = {
2402         NULL,
2403         gen_helper_neon_mul_u16,
2404         tcg_gen_mul_i32,
2405         NULL,
2406     };
2407     static NeonGenTwoOpFn * const accfn[] = {
2408         NULL,
2409         gen_helper_neon_add_u16,
2410         tcg_gen_add_i32,
2411         NULL,
2412     };
2413
2414     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2415 }
2416
2417 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2418 {
2419     static NeonGenTwoOpFn * const opfn[] = {
2420         NULL,
2421         gen_helper_neon_mul_u16,
2422         tcg_gen_mul_i32,
2423         NULL,
2424     };
2425     static NeonGenTwoOpFn * const accfn[] = {
2426         NULL,
2427         gen_helper_neon_sub_u16,
2428         tcg_gen_sub_i32,
2429         NULL,
2430     };
2431
2432     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2433 }
2434
2435 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2436                               gen_helper_gvec_3_ptr *fn)
2437 {
2438     /* Two registers and a scalar, using gvec */
2439     int vec_size = a->q ? 16 : 8;
2440     int rd_ofs = neon_reg_offset(a->vd, 0);
2441     int rn_ofs = neon_reg_offset(a->vn, 0);
2442     int rm_ofs;
2443     int idx;
2444     TCGv_ptr fpstatus;
2445
2446     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2447         return false;
2448     }
2449
2450     /* UNDEF accesses to D16-D31 if they don't exist. */
2451     if (!dc_isar_feature(aa32_simd_r32, s) &&
2452         ((a->vd | a->vn | a->vm) & 0x10)) {
2453         return false;
2454     }
2455
2456     if (!fn) {
2457         /* Bad size (including size == 3, which is a different insn group) */
2458         return false;
2459     }
2460
2461     if (a->q && ((a->vd | a->vn) & 1)) {
2462         return false;
2463     }
2464
2465     if (!vfp_access_check(s)) {
2466         return true;
2467     }
2468
2469     /* a->vm is M:Vm, which encodes both register and index */
2470     idx = extract32(a->vm, a->size + 2, 2);
2471     a->vm = extract32(a->vm, 0, a->size + 2);
2472     rm_ofs = neon_reg_offset(a->vm, 0);
2473
2474     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2475     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2476                        vec_size, vec_size, idx, fn);
2477     tcg_temp_free_ptr(fpstatus);
2478     return true;
2479 }
2480
2481 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2482     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2483     {                                                                   \
2484         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2485             NULL,                                                       \
2486             gen_helper_##FUNC##_h,                                      \
2487             gen_helper_##FUNC##_s,                                      \
2488             NULL,                                                       \
2489         };                                                              \
2490         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2491             return false;                                               \
2492         }                                                               \
2493         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2494     }
2495
2496 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2497 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2498 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2499
2500 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2501 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2502 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2503 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2504
2505 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2506 {
2507     static NeonGenTwoOpFn * const opfn[] = {
2508         NULL,
2509         gen_VQDMULH_16,
2510         gen_VQDMULH_32,
2511         NULL,
2512     };
2513
2514     return do_2scalar(s, a, opfn[a->size], NULL);
2515 }
2516
2517 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2518 {
2519     static NeonGenTwoOpFn * const opfn[] = {
2520         NULL,
2521         gen_VQRDMULH_16,
2522         gen_VQRDMULH_32,
2523         NULL,
2524     };
2525
2526     return do_2scalar(s, a, opfn[a->size], NULL);
2527 }
2528
2529 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2530                             NeonGenThreeOpEnvFn *opfn)
2531 {
2532     /*
2533      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2534      * performs a kind of fused op-then-accumulate using a helper
2535      * function that takes all of rd, rn and the scalar at once.
2536      */
2537     TCGv_i32 scalar;
2538     int pass;
2539
2540     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2541         return false;
2542     }
2543
2544     if (!dc_isar_feature(aa32_rdm, s)) {
2545         return false;
2546     }
2547
2548     /* UNDEF accesses to D16-D31 if they don't exist. */
2549     if (!dc_isar_feature(aa32_simd_r32, s) &&
2550         ((a->vd | a->vn | a->vm) & 0x10)) {
2551         return false;
2552     }
2553
2554     if (!opfn) {
2555         /* Bad size (including size == 3, which is a different insn group) */
2556         return false;
2557     }
2558
2559     if (a->q && ((a->vd | a->vn) & 1)) {
2560         return false;
2561     }
2562
2563     if (!vfp_access_check(s)) {
2564         return true;
2565     }
2566
2567     scalar = neon_get_scalar(a->size, a->vm);
2568
2569     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2570         TCGv_i32 rn = neon_load_reg(a->vn, pass);
2571         TCGv_i32 rd = neon_load_reg(a->vd, pass);
2572         opfn(rd, cpu_env, rn, scalar, rd);
2573         tcg_temp_free_i32(rn);
2574         neon_store_reg(a->vd, pass, rd);
2575     }
2576     tcg_temp_free_i32(scalar);
2577
2578     return true;
2579 }
2580
2581 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2582 {
2583     static NeonGenThreeOpEnvFn *opfn[] = {
2584         NULL,
2585         gen_helper_neon_qrdmlah_s16,
2586         gen_helper_neon_qrdmlah_s32,
2587         NULL,
2588     };
2589     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2590 }
2591
2592 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2593 {
2594     static NeonGenThreeOpEnvFn *opfn[] = {
2595         NULL,
2596         gen_helper_neon_qrdmlsh_s16,
2597         gen_helper_neon_qrdmlsh_s32,
2598         NULL,
2599     };
2600     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2601 }
2602
2603 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2604                             NeonGenTwoOpWidenFn *opfn,
2605                             NeonGenTwo64OpFn *accfn)
2606 {
2607     /*
2608      * Two registers and a scalar, long operations: perform an
2609      * operation on the input elements and the scalar which produces
2610      * a double-width result, and then possibly perform an accumulation
2611      * operation of that result into the destination.
2612      */
2613     TCGv_i32 scalar, rn;
2614     TCGv_i64 rn0_64, rn1_64;
2615
2616     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2617         return false;
2618     }
2619
2620     /* UNDEF accesses to D16-D31 if they don't exist. */
2621     if (!dc_isar_feature(aa32_simd_r32, s) &&
2622         ((a->vd | a->vn | a->vm) & 0x10)) {
2623         return false;
2624     }
2625
2626     if (!opfn) {
2627         /* Bad size (including size == 3, which is a different insn group) */
2628         return false;
2629     }
2630
2631     if (a->vd & 1) {
2632         return false;
2633     }
2634
2635     if (!vfp_access_check(s)) {
2636         return true;
2637     }
2638
2639     scalar = neon_get_scalar(a->size, a->vm);
2640
2641     /* Load all inputs before writing any outputs, in case of overlap */
2642     rn = neon_load_reg(a->vn, 0);
2643     rn0_64 = tcg_temp_new_i64();
2644     opfn(rn0_64, rn, scalar);
2645     tcg_temp_free_i32(rn);
2646
2647     rn = neon_load_reg(a->vn, 1);
2648     rn1_64 = tcg_temp_new_i64();
2649     opfn(rn1_64, rn, scalar);
2650     tcg_temp_free_i32(rn);
2651     tcg_temp_free_i32(scalar);
2652
2653     if (accfn) {
2654         TCGv_i64 t64 = tcg_temp_new_i64();
2655         neon_load_reg64(t64, a->vd);
2656         accfn(t64, t64, rn0_64);
2657         neon_store_reg64(t64, a->vd);
2658         neon_load_reg64(t64, a->vd + 1);
2659         accfn(t64, t64, rn1_64);
2660         neon_store_reg64(t64, a->vd + 1);
2661         tcg_temp_free_i64(t64);
2662     } else {
2663         neon_store_reg64(rn0_64, a->vd);
2664         neon_store_reg64(rn1_64, a->vd + 1);
2665     }
2666     tcg_temp_free_i64(rn0_64);
2667     tcg_temp_free_i64(rn1_64);
2668     return true;
2669 }
2670
2671 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2672 {
2673     static NeonGenTwoOpWidenFn * const opfn[] = {
2674         NULL,
2675         gen_helper_neon_mull_s16,
2676         gen_mull_s32,
2677         NULL,
2678     };
2679
2680     return do_2scalar_long(s, a, opfn[a->size], NULL);
2681 }
2682
2683 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2684 {
2685     static NeonGenTwoOpWidenFn * const opfn[] = {
2686         NULL,
2687         gen_helper_neon_mull_u16,
2688         gen_mull_u32,
2689         NULL,
2690     };
2691
2692     return do_2scalar_long(s, a, opfn[a->size], NULL);
2693 }
2694
2695 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2696     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2697     {                                                                   \
2698         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2699             NULL,                                                       \
2700             gen_helper_neon_##MULL##16,                                 \
2701             gen_##MULL##32,                                             \
2702             NULL,                                                       \
2703         };                                                              \
2704         static NeonGenTwo64OpFn * const accfn[] = {                     \
2705             NULL,                                                       \
2706             gen_helper_neon_##ACC##l_u32,                               \
2707             tcg_gen_##ACC##_i64,                                        \
2708             NULL,                                                       \
2709         };                                                              \
2710         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2711     }
2712
2713 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2714 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2715 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2716 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2717
2718 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2719 {
2720     static NeonGenTwoOpWidenFn * const opfn[] = {
2721         NULL,
2722         gen_VQDMULL_16,
2723         gen_VQDMULL_32,
2724         NULL,
2725     };
2726
2727     return do_2scalar_long(s, a, opfn[a->size], NULL);
2728 }
2729
2730 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2731 {
2732     static NeonGenTwoOpWidenFn * const opfn[] = {
2733         NULL,
2734         gen_VQDMULL_16,
2735         gen_VQDMULL_32,
2736         NULL,
2737     };
2738     static NeonGenTwo64OpFn * const accfn[] = {
2739         NULL,
2740         gen_VQDMLAL_acc_16,
2741         gen_VQDMLAL_acc_32,
2742         NULL,
2743     };
2744
2745     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2746 }
2747
2748 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2749 {
2750     static NeonGenTwoOpWidenFn * const opfn[] = {
2751         NULL,
2752         gen_VQDMULL_16,
2753         gen_VQDMULL_32,
2754         NULL,
2755     };
2756     static NeonGenTwo64OpFn * const accfn[] = {
2757         NULL,
2758         gen_VQDMLSL_acc_16,
2759         gen_VQDMLSL_acc_32,
2760         NULL,
2761     };
2762
2763     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2764 }
2765
2766 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2767 {
2768     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2769         return false;
2770     }
2771
2772     /* UNDEF accesses to D16-D31 if they don't exist. */
2773     if (!dc_isar_feature(aa32_simd_r32, s) &&
2774         ((a->vd | a->vn | a->vm) & 0x10)) {
2775         return false;
2776     }
2777
2778     if ((a->vn | a->vm | a->vd) & a->q) {
2779         return false;
2780     }
2781
2782     if (a->imm > 7 && !a->q) {
2783         return false;
2784     }
2785
2786     if (!vfp_access_check(s)) {
2787         return true;
2788     }
2789
2790     if (!a->q) {
2791         /* Extract 64 bits from <Vm:Vn> */
2792         TCGv_i64 left, right, dest;
2793
2794         left = tcg_temp_new_i64();
2795         right = tcg_temp_new_i64();
2796         dest = tcg_temp_new_i64();
2797
2798         neon_load_reg64(right, a->vn);
2799         neon_load_reg64(left, a->vm);
2800         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2801         neon_store_reg64(dest, a->vd);
2802
2803         tcg_temp_free_i64(left);
2804         tcg_temp_free_i64(right);
2805         tcg_temp_free_i64(dest);
2806     } else {
2807         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2808         TCGv_i64 left, middle, right, destleft, destright;
2809
2810         left = tcg_temp_new_i64();
2811         middle = tcg_temp_new_i64();
2812         right = tcg_temp_new_i64();
2813         destleft = tcg_temp_new_i64();
2814         destright = tcg_temp_new_i64();
2815
2816         if (a->imm < 8) {
2817             neon_load_reg64(right, a->vn);
2818             neon_load_reg64(middle, a->vn + 1);
2819             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2820             neon_load_reg64(left, a->vm);
2821             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2822         } else {
2823             neon_load_reg64(right, a->vn + 1);
2824             neon_load_reg64(middle, a->vm);
2825             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2826             neon_load_reg64(left, a->vm + 1);
2827             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2828         }
2829
2830         neon_store_reg64(destright, a->vd);
2831         neon_store_reg64(destleft, a->vd + 1);
2832
2833         tcg_temp_free_i64(destright);
2834         tcg_temp_free_i64(destleft);
2835         tcg_temp_free_i64(right);
2836         tcg_temp_free_i64(middle);
2837         tcg_temp_free_i64(left);
2838     }
2839     return true;
2840 }
2841
2842 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2843 {
2844     int n;
2845     TCGv_i32 tmp, tmp2, tmp3, tmp4;
2846     TCGv_ptr ptr1;
2847
2848     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2849         return false;
2850     }
2851
2852     /* UNDEF accesses to D16-D31 if they don't exist. */
2853     if (!dc_isar_feature(aa32_simd_r32, s) &&
2854         ((a->vd | a->vn | a->vm) & 0x10)) {
2855         return false;
2856     }
2857
2858     if (!vfp_access_check(s)) {
2859         return true;
2860     }
2861
2862     n = a->len + 1;
2863     if ((a->vn + n) > 32) {
2864         /*
2865          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2866          * helper function running off the end of the register file.
2867          */
2868         return false;
2869     }
2870     n <<= 3;
2871     if (a->op) {
2872         tmp = neon_load_reg(a->vd, 0);
2873     } else {
2874         tmp = tcg_temp_new_i32();
2875         tcg_gen_movi_i32(tmp, 0);
2876     }
2877     tmp2 = neon_load_reg(a->vm, 0);
2878     ptr1 = vfp_reg_ptr(true, a->vn);
2879     tmp4 = tcg_const_i32(n);
2880     gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
2881     tcg_temp_free_i32(tmp);
2882     if (a->op) {
2883         tmp = neon_load_reg(a->vd, 1);
2884     } else {
2885         tmp = tcg_temp_new_i32();
2886         tcg_gen_movi_i32(tmp, 0);
2887     }
2888     tmp3 = neon_load_reg(a->vm, 1);
2889     gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
2890     tcg_temp_free_i32(tmp4);
2891     tcg_temp_free_ptr(ptr1);
2892     neon_store_reg(a->vd, 0, tmp2);
2893     neon_store_reg(a->vd, 1, tmp3);
2894     tcg_temp_free_i32(tmp);
2895     return true;
2896 }
2897
2898 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2899 {
2900     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2901         return false;
2902     }
2903
2904     /* UNDEF accesses to D16-D31 if they don't exist. */
2905     if (!dc_isar_feature(aa32_simd_r32, s) &&
2906         ((a->vd | a->vm) & 0x10)) {
2907         return false;
2908     }
2909
2910     if (a->vd & a->q) {
2911         return false;
2912     }
2913
2914     if (!vfp_access_check(s)) {
2915         return true;
2916     }
2917
2918     tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
2919                          neon_element_offset(a->vm, a->index, a->size),
2920                          a->q ? 16 : 8, a->q ? 16 : 8);
2921     return true;
2922 }
2923
2924 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2925 {
2926     int pass, half;
2927
2928     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2929         return false;
2930     }
2931
2932     /* UNDEF accesses to D16-D31 if they don't exist. */
2933     if (!dc_isar_feature(aa32_simd_r32, s) &&
2934         ((a->vd | a->vm) & 0x10)) {
2935         return false;
2936     }
2937
2938     if ((a->vd | a->vm) & a->q) {
2939         return false;
2940     }
2941
2942     if (a->size == 3) {
2943         return false;
2944     }
2945
2946     if (!vfp_access_check(s)) {
2947         return true;
2948     }
2949
2950     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2951         TCGv_i32 tmp[2];
2952
2953         for (half = 0; half < 2; half++) {
2954             tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
2955             switch (a->size) {
2956             case 0:
2957                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2958                 break;
2959             case 1:
2960                 gen_swap_half(tmp[half], tmp[half]);
2961                 break;
2962             case 2:
2963                 break;
2964             default:
2965                 g_assert_not_reached();
2966             }
2967         }
2968         neon_store_reg(a->vd, pass * 2, tmp[1]);
2969         neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
2970     }
2971     return true;
2972 }
2973
2974 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2975                               NeonGenWidenFn *widenfn,
2976                               NeonGenTwo64OpFn *opfn,
2977                               NeonGenTwo64OpFn *accfn)
2978 {
2979     /*
2980      * Pairwise long operations: widen both halves of the pair,
2981      * combine the pairs with the opfn, and then possibly accumulate
2982      * into the destination with the accfn.
2983      */
2984     int pass;
2985
2986     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2987         return false;
2988     }
2989
2990     /* UNDEF accesses to D16-D31 if they don't exist. */
2991     if (!dc_isar_feature(aa32_simd_r32, s) &&
2992         ((a->vd | a->vm) & 0x10)) {
2993         return false;
2994     }
2995
2996     if ((a->vd | a->vm) & a->q) {
2997         return false;
2998     }
2999
3000     if (!widenfn) {
3001         return false;
3002     }
3003
3004     if (!vfp_access_check(s)) {
3005         return true;
3006     }
3007
3008     for (pass = 0; pass < a->q + 1; pass++) {
3009         TCGv_i32 tmp;
3010         TCGv_i64 rm0_64, rm1_64, rd_64;
3011
3012         rm0_64 = tcg_temp_new_i64();
3013         rm1_64 = tcg_temp_new_i64();
3014         rd_64 = tcg_temp_new_i64();
3015         tmp = neon_load_reg(a->vm, pass * 2);
3016         widenfn(rm0_64, tmp);
3017         tcg_temp_free_i32(tmp);
3018         tmp = neon_load_reg(a->vm, pass * 2 + 1);
3019         widenfn(rm1_64, tmp);
3020         tcg_temp_free_i32(tmp);
3021         opfn(rd_64, rm0_64, rm1_64);
3022         tcg_temp_free_i64(rm0_64);
3023         tcg_temp_free_i64(rm1_64);
3024
3025         if (accfn) {
3026             TCGv_i64 tmp64 = tcg_temp_new_i64();
3027             neon_load_reg64(tmp64, a->vd + pass);
3028             accfn(rd_64, tmp64, rd_64);
3029             tcg_temp_free_i64(tmp64);
3030         }
3031         neon_store_reg64(rd_64, a->vd + pass);
3032         tcg_temp_free_i64(rd_64);
3033     }
3034     return true;
3035 }
3036
3037 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3038 {
3039     static NeonGenWidenFn * const widenfn[] = {
3040         gen_helper_neon_widen_s8,
3041         gen_helper_neon_widen_s16,
3042         tcg_gen_ext_i32_i64,
3043         NULL,
3044     };
3045     static NeonGenTwo64OpFn * const opfn[] = {
3046         gen_helper_neon_paddl_u16,
3047         gen_helper_neon_paddl_u32,
3048         tcg_gen_add_i64,
3049         NULL,
3050     };
3051
3052     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3053 }
3054
3055 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3056 {
3057     static NeonGenWidenFn * const widenfn[] = {
3058         gen_helper_neon_widen_u8,
3059         gen_helper_neon_widen_u16,
3060         tcg_gen_extu_i32_i64,
3061         NULL,
3062     };
3063     static NeonGenTwo64OpFn * const opfn[] = {
3064         gen_helper_neon_paddl_u16,
3065         gen_helper_neon_paddl_u32,
3066         tcg_gen_add_i64,
3067         NULL,
3068     };
3069
3070     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3071 }
3072
3073 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3074 {
3075     static NeonGenWidenFn * const widenfn[] = {
3076         gen_helper_neon_widen_s8,
3077         gen_helper_neon_widen_s16,
3078         tcg_gen_ext_i32_i64,
3079         NULL,
3080     };
3081     static NeonGenTwo64OpFn * const opfn[] = {
3082         gen_helper_neon_paddl_u16,
3083         gen_helper_neon_paddl_u32,
3084         tcg_gen_add_i64,
3085         NULL,
3086     };
3087     static NeonGenTwo64OpFn * const accfn[] = {
3088         gen_helper_neon_addl_u16,
3089         gen_helper_neon_addl_u32,
3090         tcg_gen_add_i64,
3091         NULL,
3092     };
3093
3094     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3095                              accfn[a->size]);
3096 }
3097
3098 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3099 {
3100     static NeonGenWidenFn * const widenfn[] = {
3101         gen_helper_neon_widen_u8,
3102         gen_helper_neon_widen_u16,
3103         tcg_gen_extu_i32_i64,
3104         NULL,
3105     };
3106     static NeonGenTwo64OpFn * const opfn[] = {
3107         gen_helper_neon_paddl_u16,
3108         gen_helper_neon_paddl_u32,
3109         tcg_gen_add_i64,
3110         NULL,
3111     };
3112     static NeonGenTwo64OpFn * const accfn[] = {
3113         gen_helper_neon_addl_u16,
3114         gen_helper_neon_addl_u32,
3115         tcg_gen_add_i64,
3116         NULL,
3117     };
3118
3119     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3120                              accfn[a->size]);
3121 }
3122
3123 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3124
3125 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3126                        ZipFn *fn)
3127 {
3128     TCGv_ptr pd, pm;
3129
3130     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3131         return false;
3132     }
3133
3134     /* UNDEF accesses to D16-D31 if they don't exist. */
3135     if (!dc_isar_feature(aa32_simd_r32, s) &&
3136         ((a->vd | a->vm) & 0x10)) {
3137         return false;
3138     }
3139
3140     if ((a->vd | a->vm) & a->q) {
3141         return false;
3142     }
3143
3144     if (!fn) {
3145         /* Bad size or size/q combination */
3146         return false;
3147     }
3148
3149     if (!vfp_access_check(s)) {
3150         return true;
3151     }
3152
3153     pd = vfp_reg_ptr(true, a->vd);
3154     pm = vfp_reg_ptr(true, a->vm);
3155     fn(pd, pm);
3156     tcg_temp_free_ptr(pd);
3157     tcg_temp_free_ptr(pm);
3158     return true;
3159 }
3160
3161 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3162 {
3163     static ZipFn * const fn[2][4] = {
3164         {
3165             gen_helper_neon_unzip8,
3166             gen_helper_neon_unzip16,
3167             NULL,
3168             NULL,
3169         }, {
3170             gen_helper_neon_qunzip8,
3171             gen_helper_neon_qunzip16,
3172             gen_helper_neon_qunzip32,
3173             NULL,
3174         }
3175     };
3176     return do_zip_uzp(s, a, fn[a->q][a->size]);
3177 }
3178
3179 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3180 {
3181     static ZipFn * const fn[2][4] = {
3182         {
3183             gen_helper_neon_zip8,
3184             gen_helper_neon_zip16,
3185             NULL,
3186             NULL,
3187         }, {
3188             gen_helper_neon_qzip8,
3189             gen_helper_neon_qzip16,
3190             gen_helper_neon_qzip32,
3191             NULL,
3192         }
3193     };
3194     return do_zip_uzp(s, a, fn[a->q][a->size]);
3195 }
3196
3197 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3198                      NeonGenNarrowEnvFn *narrowfn)
3199 {
3200     TCGv_i64 rm;
3201     TCGv_i32 rd0, rd1;
3202
3203     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3204         return false;
3205     }
3206
3207     /* UNDEF accesses to D16-D31 if they don't exist. */
3208     if (!dc_isar_feature(aa32_simd_r32, s) &&
3209         ((a->vd | a->vm) & 0x10)) {
3210         return false;
3211     }
3212
3213     if (a->vm & 1) {
3214         return false;
3215     }
3216
3217     if (!narrowfn) {
3218         return false;
3219     }
3220
3221     if (!vfp_access_check(s)) {
3222         return true;
3223     }
3224
3225     rm = tcg_temp_new_i64();
3226     rd0 = tcg_temp_new_i32();
3227     rd1 = tcg_temp_new_i32();
3228
3229     neon_load_reg64(rm, a->vm);
3230     narrowfn(rd0, cpu_env, rm);
3231     neon_load_reg64(rm, a->vm + 1);
3232     narrowfn(rd1, cpu_env, rm);
3233     neon_store_reg(a->vd, 0, rd0);
3234     neon_store_reg(a->vd, 1, rd1);
3235     tcg_temp_free_i64(rm);
3236     return true;
3237 }
3238
3239 #define DO_VMOVN(INSN, FUNC)                                    \
3240     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3241     {                                                           \
3242         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3243             FUNC##8,                                            \
3244             FUNC##16,                                           \
3245             FUNC##32,                                           \
3246             NULL,                                               \
3247         };                                                      \
3248         return do_vmovn(s, a, narrowfn[a->size]);               \
3249     }
3250
3251 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3252 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3253 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3254 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3255
3256 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3257 {
3258     TCGv_i32 rm0, rm1;
3259     TCGv_i64 rd;
3260     static NeonGenWidenFn * const widenfns[] = {
3261         gen_helper_neon_widen_u8,
3262         gen_helper_neon_widen_u16,
3263         tcg_gen_extu_i32_i64,
3264         NULL,
3265     };
3266     NeonGenWidenFn *widenfn = widenfns[a->size];
3267
3268     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3269         return false;
3270     }
3271
3272     /* UNDEF accesses to D16-D31 if they don't exist. */
3273     if (!dc_isar_feature(aa32_simd_r32, s) &&
3274         ((a->vd | a->vm) & 0x10)) {
3275         return false;
3276     }
3277
3278     if (a->vd & 1) {
3279         return false;
3280     }
3281
3282     if (!widenfn) {
3283         return false;
3284     }
3285
3286     if (!vfp_access_check(s)) {
3287         return true;
3288     }
3289
3290     rd = tcg_temp_new_i64();
3291
3292     rm0 = neon_load_reg(a->vm, 0);
3293     rm1 = neon_load_reg(a->vm, 1);
3294
3295     widenfn(rd, rm0);
3296     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3297     neon_store_reg64(rd, a->vd);
3298     widenfn(rd, rm1);
3299     tcg_gen_shli_i64(rd, rd, 8 << a->size);
3300     neon_store_reg64(rd, a->vd + 1);
3301
3302     tcg_temp_free_i64(rd);
3303     tcg_temp_free_i32(rm0);
3304     tcg_temp_free_i32(rm1);
3305     return true;
3306 }
3307
3308 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3309 {
3310     TCGv_ptr fpst;
3311     TCGv_i32 ahp, tmp, tmp2, tmp3;
3312
3313     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3314         !dc_isar_feature(aa32_fp16_spconv, s)) {
3315         return false;
3316     }
3317
3318     /* UNDEF accesses to D16-D31 if they don't exist. */
3319     if (!dc_isar_feature(aa32_simd_r32, s) &&
3320         ((a->vd | a->vm) & 0x10)) {
3321         return false;
3322     }
3323
3324     if ((a->vm & 1) || (a->size != 1)) {
3325         return false;
3326     }
3327
3328     if (!vfp_access_check(s)) {
3329         return true;
3330     }
3331
3332     fpst = fpstatus_ptr(FPST_STD);
3333     ahp = get_ahp_flag();
3334     tmp = neon_load_reg(a->vm, 0);
3335     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3336     tmp2 = neon_load_reg(a->vm, 1);
3337     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3338     tcg_gen_shli_i32(tmp2, tmp2, 16);
3339     tcg_gen_or_i32(tmp2, tmp2, tmp);
3340     tcg_temp_free_i32(tmp);
3341     tmp = neon_load_reg(a->vm, 2);
3342     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3343     tmp3 = neon_load_reg(a->vm, 3);
3344     neon_store_reg(a->vd, 0, tmp2);
3345     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3346     tcg_gen_shli_i32(tmp3, tmp3, 16);
3347     tcg_gen_or_i32(tmp3, tmp3, tmp);
3348     neon_store_reg(a->vd, 1, tmp3);
3349     tcg_temp_free_i32(tmp);
3350     tcg_temp_free_i32(ahp);
3351     tcg_temp_free_ptr(fpst);
3352
3353     return true;
3354 }
3355
3356 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3357 {
3358     TCGv_ptr fpst;
3359     TCGv_i32 ahp, tmp, tmp2, tmp3;
3360
3361     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3362         !dc_isar_feature(aa32_fp16_spconv, s)) {
3363         return false;
3364     }
3365
3366     /* UNDEF accesses to D16-D31 if they don't exist. */
3367     if (!dc_isar_feature(aa32_simd_r32, s) &&
3368         ((a->vd | a->vm) & 0x10)) {
3369         return false;
3370     }
3371
3372     if ((a->vd & 1) || (a->size != 1)) {
3373         return false;
3374     }
3375
3376     if (!vfp_access_check(s)) {
3377         return true;
3378     }
3379
3380     fpst = fpstatus_ptr(FPST_STD);
3381     ahp = get_ahp_flag();
3382     tmp3 = tcg_temp_new_i32();
3383     tmp = neon_load_reg(a->vm, 0);
3384     tmp2 = neon_load_reg(a->vm, 1);
3385     tcg_gen_ext16u_i32(tmp3, tmp);
3386     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3387     neon_store_reg(a->vd, 0, tmp3);
3388     tcg_gen_shri_i32(tmp, tmp, 16);
3389     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3390     neon_store_reg(a->vd, 1, tmp);
3391     tmp3 = tcg_temp_new_i32();
3392     tcg_gen_ext16u_i32(tmp3, tmp2);
3393     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3394     neon_store_reg(a->vd, 2, tmp3);
3395     tcg_gen_shri_i32(tmp2, tmp2, 16);
3396     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3397     neon_store_reg(a->vd, 3, tmp2);
3398     tcg_temp_free_i32(ahp);
3399     tcg_temp_free_ptr(fpst);
3400
3401     return true;
3402 }
3403
3404 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3405 {
3406     int vec_size = a->q ? 16 : 8;
3407     int rd_ofs = neon_reg_offset(a->vd, 0);
3408     int rm_ofs = neon_reg_offset(a->vm, 0);
3409
3410     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3411         return false;
3412     }
3413
3414     /* UNDEF accesses to D16-D31 if they don't exist. */
3415     if (!dc_isar_feature(aa32_simd_r32, s) &&
3416         ((a->vd | a->vm) & 0x10)) {
3417         return false;
3418     }
3419
3420     if (a->size == 3) {
3421         return false;
3422     }
3423
3424     if ((a->vd | a->vm) & a->q) {
3425         return false;
3426     }
3427
3428     if (!vfp_access_check(s)) {
3429         return true;
3430     }
3431
3432     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3433
3434     return true;
3435 }
3436
3437 #define DO_2MISC_VEC(INSN, FN)                                  \
3438     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3439     {                                                           \
3440         return do_2misc_vec(s, a, FN);                          \
3441     }
3442
3443 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3444 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3445 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3446 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3447 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3448 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3449 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3450
3451 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3452 {
3453     if (a->size != 0) {
3454         return false;
3455     }
3456     return do_2misc_vec(s, a, tcg_gen_gvec_not);
3457 }
3458
3459 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3460     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3461                          uint32_t rm_ofs, uint32_t oprsz,               \
3462                          uint32_t maxsz)                                \
3463     {                                                                   \
3464         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3465                            DATA, FUNC);                                 \
3466     }
3467
3468 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3469     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3470                          uint32_t rm_ofs, uint32_t oprsz,               \
3471                          uint32_t maxsz)                                \
3472     {                                                                   \
3473         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3474     }
3475
3476 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3477 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3478 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3479 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3480 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3481 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3482 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3483
3484 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3485     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3486     {                                                           \
3487         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3488             return false;                                       \
3489         }                                                       \
3490         return do_2misc_vec(s, a, gen_##INSN);                  \
3491     }
3492
3493 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3494 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3495 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3496 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3497 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3498 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3499 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3500
3501 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3502 {
3503     int pass;
3504
3505     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3506     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3507         return false;
3508     }
3509
3510     /* UNDEF accesses to D16-D31 if they don't exist. */
3511     if (!dc_isar_feature(aa32_simd_r32, s) &&
3512         ((a->vd | a->vm) & 0x10)) {
3513         return false;
3514     }
3515
3516     if (!fn) {
3517         return false;
3518     }
3519
3520     if ((a->vd | a->vm) & a->q) {
3521         return false;
3522     }
3523
3524     if (!vfp_access_check(s)) {
3525         return true;
3526     }
3527
3528     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3529         TCGv_i32 tmp = neon_load_reg(a->vm, pass);
3530         fn(tmp, tmp);
3531         neon_store_reg(a->vd, pass, tmp);
3532     }
3533
3534     return true;
3535 }
3536
3537 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3538 {
3539     static NeonGenOneOpFn * const fn[] = {
3540         tcg_gen_bswap32_i32,
3541         gen_swap_half,
3542         NULL,
3543         NULL,
3544     };
3545     return do_2misc(s, a, fn[a->size]);
3546 }
3547
3548 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3549 {
3550     if (a->size != 0) {
3551         return false;
3552     }
3553     return do_2misc(s, a, gen_rev16);
3554 }
3555
3556 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3557 {
3558     static NeonGenOneOpFn * const fn[] = {
3559         gen_helper_neon_cls_s8,
3560         gen_helper_neon_cls_s16,
3561         gen_helper_neon_cls_s32,
3562         NULL,
3563     };
3564     return do_2misc(s, a, fn[a->size]);
3565 }
3566
3567 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3568 {
3569     tcg_gen_clzi_i32(rd, rm, 32);
3570 }
3571
3572 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3573 {
3574     static NeonGenOneOpFn * const fn[] = {
3575         gen_helper_neon_clz_u8,
3576         gen_helper_neon_clz_u16,
3577         do_VCLZ_32,
3578         NULL,
3579     };
3580     return do_2misc(s, a, fn[a->size]);
3581 }
3582
3583 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3584 {
3585     if (a->size != 0) {
3586         return false;
3587     }
3588     return do_2misc(s, a, gen_helper_neon_cnt_u8);
3589 }
3590
3591 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3592                        uint32_t oprsz, uint32_t maxsz)
3593 {
3594     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3595                       vece == MO_16 ? 0x7fff : 0x7fffffff,
3596                       oprsz, maxsz);
3597 }
3598
3599 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3600 {
3601     if (a->size == MO_16) {
3602         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3603             return false;
3604         }
3605     } else if (a->size != MO_32) {
3606         return false;
3607     }
3608     return do_2misc_vec(s, a, gen_VABS_F);
3609 }
3610
3611 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3612                        uint32_t oprsz, uint32_t maxsz)
3613 {
3614     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3615                       vece == MO_16 ? 0x8000 : 0x80000000,
3616                       oprsz, maxsz);
3617 }
3618
3619 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3620 {
3621     if (a->size == MO_16) {
3622         if (!dc_isar_feature(aa32_fp16_arith, s)) {
3623             return false;
3624         }
3625     } else if (a->size != MO_32) {
3626         return false;
3627     }
3628     return do_2misc_vec(s, a, gen_VNEG_F);
3629 }
3630
3631 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3632 {
3633     if (a->size != 2) {
3634         return false;
3635     }
3636     return do_2misc(s, a, gen_helper_recpe_u32);
3637 }
3638
3639 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3640 {
3641     if (a->size != 2) {
3642         return false;
3643     }
3644     return do_2misc(s, a, gen_helper_rsqrte_u32);
3645 }
3646
3647 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3648     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3649     {                                                   \
3650         FUNC(d, cpu_env, m);                            \
3651     }
3652
3653 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3654 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3655 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3656 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3657 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3658 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3659
3660 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3661 {
3662     static NeonGenOneOpFn * const fn[] = {
3663         gen_VQABS_s8,
3664         gen_VQABS_s16,
3665         gen_VQABS_s32,
3666         NULL,
3667     };
3668     return do_2misc(s, a, fn[a->size]);
3669 }
3670
3671 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3672 {
3673     static NeonGenOneOpFn * const fn[] = {
3674         gen_VQNEG_s8,
3675         gen_VQNEG_s16,
3676         gen_VQNEG_s32,
3677         NULL,
3678     };
3679     return do_2misc(s, a, fn[a->size]);
3680 }
3681
3682 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3683     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3684                            uint32_t rm_ofs,                             \
3685                            uint32_t oprsz, uint32_t maxsz)              \
3686     {                                                                   \
3687         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3688             NULL, HFUNC, SFUNC, NULL,                                   \
3689         };                                                              \
3690         TCGv_ptr fpst;                                                  \
3691         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3692         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3693                            fns[vece]);                                  \
3694         tcg_temp_free_ptr(fpst);                                        \
3695     }                                                                   \
3696     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3697     {                                                                   \
3698         if (a->size == MO_16) {                                         \
3699             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3700                 return false;                                           \
3701             }                                                           \
3702         } else if (a->size != MO_32) {                                  \
3703             return false;                                               \
3704         }                                                               \
3705         return do_2misc_vec(s, a, gen_##INSN);                          \
3706     }
3707
3708 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3709 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3710 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3711 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3712 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3713 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3714 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3715 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3716 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3717 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3718 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3719
3720 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3721
3722 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3723 {
3724     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3725         return false;
3726     }
3727     return trans_VRINTX_impl(s, a);
3728 }
3729
3730 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3731     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3732                            uint32_t rm_ofs,                             \
3733                            uint32_t oprsz, uint32_t maxsz)              \
3734     {                                                                   \
3735         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3736             NULL,                                                       \
3737             gen_helper_gvec_##OP##h,                                    \
3738             gen_helper_gvec_##OP##s,                                    \
3739             NULL,                                                       \
3740         };                                                              \
3741         TCGv_ptr fpst;                                                  \
3742         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3743         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3744                            arm_rmode_to_sf(RMODE), fns[vece]);          \
3745         tcg_temp_free_ptr(fpst);                                        \
3746     }                                                                   \
3747     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3748     {                                                                   \
3749         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3750             return false;                                               \
3751         }                                                               \
3752         if (a->size == MO_16) {                                         \
3753             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3754                 return false;                                           \
3755             }                                                           \
3756         } else if (a->size != MO_32) {                                  \
3757             return false;                                               \
3758         }                                                               \
3759         return do_2misc_vec(s, a, gen_##INSN);                          \
3760     }
3761
3762 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3763 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3764 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3765 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3766 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3767 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3768 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3769 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3770
3771 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3772 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3773 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3774 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3775 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3776
3777 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3778 {
3779     TCGv_i64 rm, rd;
3780     int pass;
3781
3782     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3783         return false;
3784     }
3785
3786     /* UNDEF accesses to D16-D31 if they don't exist. */
3787     if (!dc_isar_feature(aa32_simd_r32, s) &&
3788         ((a->vd | a->vm) & 0x10)) {
3789         return false;
3790     }
3791
3792     if (a->size != 0) {
3793         return false;
3794     }
3795
3796     if ((a->vd | a->vm) & a->q) {
3797         return false;
3798     }
3799
3800     if (!vfp_access_check(s)) {
3801         return true;
3802     }
3803
3804     rm = tcg_temp_new_i64();
3805     rd = tcg_temp_new_i64();
3806     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3807         neon_load_reg64(rm, a->vm + pass);
3808         neon_load_reg64(rd, a->vd + pass);
3809         neon_store_reg64(rm, a->vd + pass);
3810         neon_store_reg64(rd, a->vm + pass);
3811     }
3812     tcg_temp_free_i64(rm);
3813     tcg_temp_free_i64(rd);
3814
3815     return true;
3816 }
3817 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3818 {
3819     TCGv_i32 rd, tmp;
3820
3821     rd = tcg_temp_new_i32();
3822     tmp = tcg_temp_new_i32();
3823
3824     tcg_gen_shli_i32(rd, t0, 8);
3825     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3826     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3827     tcg_gen_or_i32(rd, rd, tmp);
3828
3829     tcg_gen_shri_i32(t1, t1, 8);
3830     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3831     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3832     tcg_gen_or_i32(t1, t1, tmp);
3833     tcg_gen_mov_i32(t0, rd);
3834
3835     tcg_temp_free_i32(tmp);
3836     tcg_temp_free_i32(rd);
3837 }
3838
3839 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3840 {
3841     TCGv_i32 rd, tmp;
3842
3843     rd = tcg_temp_new_i32();
3844     tmp = tcg_temp_new_i32();
3845
3846     tcg_gen_shli_i32(rd, t0, 16);
3847     tcg_gen_andi_i32(tmp, t1, 0xffff);
3848     tcg_gen_or_i32(rd, rd, tmp);
3849     tcg_gen_shri_i32(t1, t1, 16);
3850     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3851     tcg_gen_or_i32(t1, t1, tmp);
3852     tcg_gen_mov_i32(t0, rd);
3853
3854     tcg_temp_free_i32(tmp);
3855     tcg_temp_free_i32(rd);
3856 }
3857
3858 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3859 {
3860     TCGv_i32 tmp, tmp2;
3861     int pass;
3862
3863     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3864         return false;
3865     }
3866
3867     /* UNDEF accesses to D16-D31 if they don't exist. */
3868     if (!dc_isar_feature(aa32_simd_r32, s) &&
3869         ((a->vd | a->vm) & 0x10)) {
3870         return false;
3871     }
3872
3873     if ((a->vd | a->vm) & a->q) {
3874         return false;
3875     }
3876
3877     if (a->size == 3) {
3878         return false;
3879     }
3880
3881     if (!vfp_access_check(s)) {
3882         return true;
3883     }
3884
3885     if (a->size == 2) {
3886         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3887             tmp = neon_load_reg(a->vm, pass);
3888             tmp2 = neon_load_reg(a->vd, pass + 1);
3889             neon_store_reg(a->vm, pass, tmp2);
3890             neon_store_reg(a->vd, pass + 1, tmp);
3891         }
3892     } else {
3893         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3894             tmp = neon_load_reg(a->vm, pass);
3895             tmp2 = neon_load_reg(a->vd, pass);
3896             if (a->size == 0) {
3897                 gen_neon_trn_u8(tmp, tmp2);
3898             } else {
3899                 gen_neon_trn_u16(tmp, tmp2);
3900             }
3901             neon_store_reg(a->vm, pass, tmp2);
3902             neon_store_reg(a->vd, pass, tmp);
3903         }
3904     }
3905     return true;
3906 }