gcc/config/arm/neon.md

   1 ;; ARM NEON coprocessor Machine Description
   2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
   3 ;; Written by CodeSourcery.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21
  22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
  23 ;; type attribute definitions.
  24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
  25
  26 (define_insn "*neon_mov<mode>"
  27   [(set (match_operand:VDX 0 "nonimmediate_operand"
  28           "=w,Un,w, w,  ?r,?w,?r, ?Us")
  29         (match_operand:VDX 1 "general_operand"
  30           " w,w, Dn,Uni, w, r, Usi,r"))]
  31   "TARGET_NEON
  32    && (register_operand (operands[0], <MODE>mode)
  33        || register_operand (operands[1], <MODE>mode))"
  34 {
  35   if (which_alternative == 2)
  36     {
  37       int width, is_valid;
  38       static char templ[40];
  39
  40       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
  41         &operands[1], &width);
  42
  43       gcc_assert (is_valid != 0);
  44
  45       if (width == 0)
  46         return "vmov.f32\t%P0, %1  @ <mode>";
  47       else
  48         sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
  49
  50       return templ;
  51     }
  52
  53   switch (which_alternative)
  54     {
  55     case 0: return "vmov\t%P0, %P1  @ <mode>";
  56     case 1: case 3: return output_move_neon (operands);
  57     case 2: gcc_unreachable ();
  58     case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
  59     case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
  60     default: return output_move_double (operands, true, NULL);
  61     }
  62 }
  63  [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
  64                     neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
  65                     neon_load1_2reg, neon_store1_2reg")
  66   (set_attr "length" "4,4,4,4,4,4,8,8")
  67   (set_attr "arm_pool_range"     "*,*,*,1020,*,*,1020,*")
  68   (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,1018,*")
  69   (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
  70
  71 (define_insn "*neon_mov<mode>"
  72   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
  73           "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
  74         (match_operand:VQXMOV 1 "general_operand"
  75           " w,w, Dn,Uni, w, r, r, Usi, r"))]
  76   "TARGET_NEON
  77    && (register_operand (operands[0], <MODE>mode)
  78        || register_operand (operands[1], <MODE>mode))"
  79 {
  80   if (which_alternative == 2)
  81     {
  82       int width, is_valid;
  83       static char templ[40];
  84
  85       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
  86         &operands[1], &width);
  87
  88       gcc_assert (is_valid != 0);
  89
  90       if (width == 0)
  91         return "vmov.f32\t%q0, %1  @ <mode>";
  92       else
  93         sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
  94
  95       return templ;
  96     }
  97
  98   switch (which_alternative)
  99     {
 100     case 0: return "vmov\t%q0, %q1  @ <mode>";
 101     case 1: case 3: return output_move_neon (operands);
 102     case 2: gcc_unreachable ();
 103     case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
 104     case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
 105     default: return output_move_quad (operands);
 106     }
 107 }
 108   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
 109                      neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
 110                      mov_reg,neon_load1_4reg,neon_store1_4reg")
 111    (set_attr "length" "4,8,4,8,8,8,16,8,16")
 112    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
 113    (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
 114    (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
 115
 116 /* We define these mov expanders to match the standard mov$a optab to prevent
 117    the mid-end from trying to do a subreg for these modes which is the most
 118    inefficient way to expand the move.  Also big-endian subreg's aren't
 119    allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
 120    Without these RTL generation patterns the mid-end would attempt to take a
 121    sub-reg and may ICE if it can't.  */
 122
 123 (define_expand "movti"
 124   [(set (match_operand:TI 0 "nonimmediate_operand" "")
 125         (match_operand:TI 1 "general_operand" ""))]
 126   "TARGET_NEON"
 127 {
 128   if (can_create_pseudo_p ())
 129     {
 130       if (!REG_P (operands[0]))
 131         operands[1] = force_reg (TImode, operands[1]);
 132     }
 133 })
 134
 135 (define_expand "mov<mode>"
 136   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
 137         (match_operand:VSTRUCT 1 "general_operand" ""))]
 138   "TARGET_NEON"
 139 {
 140   if (can_create_pseudo_p ())
 141     {
 142       if (!REG_P (operands[0]))
 143         operands[1] = force_reg (<MODE>mode, operands[1]);
 144     }
 145 })
 146
 147 (define_expand "mov<mode>"
 148   [(set (match_operand:VH 0 "s_register_operand")
 149         (match_operand:VH 1 "s_register_operand"))]
 150   "TARGET_NEON"
 151 {
 152   if (can_create_pseudo_p ())
 153     {
 154       if (!REG_P (operands[0]))
 155         operands[1] = force_reg (<MODE>mode, operands[1]);
 156     }
 157 })
 158
 159 (define_insn "*neon_mov<mode>"
 160   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
 161         (match_operand:VSTRUCT 1 "general_operand"      " w,w, Ut"))]
 162   "TARGET_NEON
 163    && (register_operand (operands[0], <MODE>mode)
 164        || register_operand (operands[1], <MODE>mode))"
 165 {
 166   switch (which_alternative)
 167     {
 168     case 0: return "#";
 169     case 1: case 2: return output_move_neon (operands);
 170     default: gcc_unreachable ();
 171     }
 172 }
 173   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
 174    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
 175
 176 (define_split
 177   [(set (match_operand:EI 0 "s_register_operand" "")
 178         (match_operand:EI 1 "s_register_operand" ""))]
 179   "TARGET_NEON && reload_completed"
 180   [(set (match_dup 0) (match_dup 1))
 181    (set (match_dup 2) (match_dup 3))]
 182 {
 183   int rdest = REGNO (operands[0]);
 184   int rsrc = REGNO (operands[1]);
 185   rtx dest[2], src[2];
 186
 187   dest[0] = gen_rtx_REG (TImode, rdest);
 188   src[0] = gen_rtx_REG (TImode, rsrc);
 189   dest[1] = gen_rtx_REG (DImode, rdest + 4);
 190   src[1] = gen_rtx_REG (DImode, rsrc + 4);
 191
 192   neon_disambiguate_copy (operands, dest, src, 2);
 193 })
 194
 195 (define_split
 196   [(set (match_operand:OI 0 "s_register_operand" "")
 197         (match_operand:OI 1 "s_register_operand" ""))]
 198   "TARGET_NEON && reload_completed"
 199   [(set (match_dup 0) (match_dup 1))
 200    (set (match_dup 2) (match_dup 3))]
 201 {
 202   int rdest = REGNO (operands[0]);
 203   int rsrc = REGNO (operands[1]);
 204   rtx dest[2], src[2];
 205
 206   dest[0] = gen_rtx_REG (TImode, rdest);
 207   src[0] = gen_rtx_REG (TImode, rsrc);
 208   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 209   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 210
 211   neon_disambiguate_copy (operands, dest, src, 2);
 212 })
 213
 214 (define_split
 215   [(set (match_operand:CI 0 "s_register_operand" "")
 216         (match_operand:CI 1 "s_register_operand" ""))]
 217   "TARGET_NEON && reload_completed"
 218   [(set (match_dup 0) (match_dup 1))
 219    (set (match_dup 2) (match_dup 3))
 220    (set (match_dup 4) (match_dup 5))]
 221 {
 222   int rdest = REGNO (operands[0]);
 223   int rsrc = REGNO (operands[1]);
 224   rtx dest[3], src[3];
 225
 226   dest[0] = gen_rtx_REG (TImode, rdest);
 227   src[0] = gen_rtx_REG (TImode, rsrc);
 228   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 229   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 230   dest[2] = gen_rtx_REG (TImode, rdest + 8);
 231   src[2] = gen_rtx_REG (TImode, rsrc + 8);
 232
 233   neon_disambiguate_copy (operands, dest, src, 3);
 234 })
 235
 236 (define_split
 237   [(set (match_operand:XI 0 "s_register_operand" "")
 238         (match_operand:XI 1 "s_register_operand" ""))]
 239   "TARGET_NEON && reload_completed"
 240   [(set (match_dup 0) (match_dup 1))
 241    (set (match_dup 2) (match_dup 3))
 242    (set (match_dup 4) (match_dup 5))
 243    (set (match_dup 6) (match_dup 7))]
 244 {
 245   int rdest = REGNO (operands[0]);
 246   int rsrc = REGNO (operands[1]);
 247   rtx dest[4], src[4];
 248
 249   dest[0] = gen_rtx_REG (TImode, rdest);
 250   src[0] = gen_rtx_REG (TImode, rsrc);
 251   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 252   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 253   dest[2] = gen_rtx_REG (TImode, rdest + 8);
 254   src[2] = gen_rtx_REG (TImode, rsrc + 8);
 255   dest[3] = gen_rtx_REG (TImode, rdest + 12);
 256   src[3] = gen_rtx_REG (TImode, rsrc + 12);
 257
 258   neon_disambiguate_copy (operands, dest, src, 4);
 259 })
 260
 261 (define_expand "movmisalign<mode>"
 262   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
 263         (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
 264                      UNSPEC_MISALIGNED_ACCESS))]
 265   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 266 {
 267   rtx adjust_mem;
 268   /* This pattern is not permitted to fail during expansion: if both arguments
 269      are non-registers (e.g. memory := constant, which can be created by the
 270      auto-vectorizer), force operand 1 into a register.  */
 271   if (!s_register_operand (operands[0], <MODE>mode)
 272       && !s_register_operand (operands[1], <MODE>mode))
 273     operands[1] = force_reg (<MODE>mode, operands[1]);
 274
 275   if (s_register_operand (operands[0], <MODE>mode))
 276     adjust_mem = operands[1];
 277   else
 278     adjust_mem = operands[0];
 279
 280   /* Legitimize address.  */
 281   if (!neon_vector_mem_operand (adjust_mem, 2, true))
 282     XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
 283
 284 })
 285
 286 (define_insn "*movmisalign<mode>_neon_store"
 287   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"   "=Um")
 288         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
 289                     UNSPEC_MISALIGNED_ACCESS))]
 290   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 291   "vst1.<V_sz_elem>\t{%P1}, %A0"
 292   [(set_attr "type" "neon_store1_1reg<q>")])
 293
 294 (define_insn "*movmisalign<mode>_neon_load"
 295   [(set (match_operand:VDX 0 "s_register_operand"                       "=w")
 296         (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
 297                                                                         " Um")]
 298                     UNSPEC_MISALIGNED_ACCESS))]
 299   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 300   "vld1.<V_sz_elem>\t{%P0}, %A1"
 301   [(set_attr "type" "neon_load1_1reg<q>")])
 302
 303 (define_insn "*movmisalign<mode>_neon_store"
 304   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
 305         (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
 306                     UNSPEC_MISALIGNED_ACCESS))]
 307   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 308   "vst1.<V_sz_elem>\t{%q1}, %A0"
 309   [(set_attr "type" "neon_store1_1reg<q>")])
 310
 311 (define_insn "*movmisalign<mode>_neon_load"
 312   [(set (match_operand:VQX 0 "s_register_operand"                       "=w")
 313         (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
 314                                                                         " Um")]
 315                     UNSPEC_MISALIGNED_ACCESS))]
 316   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 317   "vld1.<V_sz_elem>\t{%q0}, %A1"
 318   [(set_attr "type" "neon_load1_1reg<q>")])
 319
 320 (define_insn "vec_set<mode>_internal"
 321   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
 322         (vec_merge:VD_LANE
 323           (vec_duplicate:VD_LANE
 324             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
 325           (match_operand:VD_LANE 3 "s_register_operand" "0,0")
 326           (match_operand:SI 2 "immediate_operand" "i,i")))]
 327   "TARGET_NEON"
 328 {
 329   int elt = ffs ((int) INTVAL (operands[2])) - 1;
 330   if (BYTES_BIG_ENDIAN)
 331     elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
 332   operands[2] = GEN_INT (elt);
 333
 334   if (which_alternative == 0)
 335     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
 336   else
 337     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 338 }
 339   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
 340
 341 (define_insn "vec_set<mode>_internal"
 342   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
 343         (vec_merge:VQ2
 344           (vec_duplicate:VQ2
 345             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
 346           (match_operand:VQ2 3 "s_register_operand" "0,0")
 347           (match_operand:SI 2 "immediate_operand" "i,i")))]
 348   "TARGET_NEON"
 349 {
 350   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
 351   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
 352   int elt = elem % half_elts;
 353   int hi = (elem / half_elts) * 2;
 354   int regno = REGNO (operands[0]);
 355
 356   if (BYTES_BIG_ENDIAN)
 357     elt = half_elts - 1 - elt;
 358
 359   operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
 360   operands[2] = GEN_INT (elt);
 361
 362   if (which_alternative == 0)
 363     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
 364   else
 365     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 366 }
 367   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
 368 )
 369
 370 (define_insn "vec_setv2di_internal"
 371   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
 372         (vec_merge:V2DI
 373           (vec_duplicate:V2DI
 374             (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
 375           (match_operand:V2DI 3 "s_register_operand" "0,0")
 376           (match_operand:SI 2 "immediate_operand" "i,i")))]
 377   "TARGET_NEON"
 378 {
 379   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
 380   int regno = REGNO (operands[0]) + 2 * elem;
 381
 382   operands[0] = gen_rtx_REG (DImode, regno);
 383
 384   if (which_alternative == 0)
 385     return "vld1.64\t%P0, %A1";
 386   else
 387     return "vmov\t%P0, %Q1, %R1";
 388 }
 389   [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
 390 )
 391
 392 (define_expand "vec_set<mode>"
 393   [(match_operand:VDQ 0 "s_register_operand" "")
 394    (match_operand:<V_elem> 1 "s_register_operand" "")
 395    (match_operand:SI 2 "immediate_operand" "")]
 396   "TARGET_NEON"
 397 {
 398   HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
 399   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
 400                                          GEN_INT (elem), operands[0]));
 401   DONE;
 402 })
 403
 404 (define_insn "vec_extract<mode><V_elem_l>"
 405   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
 406         (vec_select:<V_elem>
 407           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
 408           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 409   "TARGET_NEON"
 410 {
 411   if (BYTES_BIG_ENDIAN)
 412     {
 413       int elt = INTVAL (operands[2]);
 414       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
 415       operands[2] = GEN_INT (elt);
 416     }
 417
 418   if (which_alternative == 0)
 419     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 420   else
 421     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 422 }
 423   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 424 )
 425
 426 (define_insn "vec_extract<mode><V_elem_l>"
 427   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
 428         (vec_select:<V_elem>
 429           (match_operand:VQ2 1 "s_register_operand" "w,w")
 430           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 431   "TARGET_NEON"
 432 {
 433   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
 434   int elt = INTVAL (operands[2]) % half_elts;
 435   int hi = (INTVAL (operands[2]) / half_elts) * 2;
 436   int regno = REGNO (operands[1]);
 437
 438   if (BYTES_BIG_ENDIAN)
 439     elt = half_elts - 1 - elt;
 440
 441   operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
 442   operands[2] = GEN_INT (elt);
 443
 444   if (which_alternative == 0)
 445     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 446   else
 447     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 448 }
 449   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 450 )
 451
 452 (define_insn "vec_extractv2didi"
 453   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
 454         (vec_select:DI
 455           (match_operand:V2DI 1 "s_register_operand" "w,w")
 456           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 457   "TARGET_NEON"
 458 {
 459   int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
 460
 461   operands[1] = gen_rtx_REG (DImode, regno);
 462
 463   if (which_alternative == 0)
 464     return "vst1.64\t{%P1}, %A0  @ v2di";
 465   else
 466     return "vmov\t%Q0, %R0, %P1  @ v2di";
 467 }
 468   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
 469 )
 470
 471 (define_expand "vec_init<mode><V_elem_l>"
 472   [(match_operand:VDQ 0 "s_register_operand" "")
 473    (match_operand 1 "" "")]
 474   "TARGET_NEON"
 475 {
 476   neon_expand_vector_init (operands[0], operands[1]);
 477   DONE;
 478 })
 479
 480 ;; Doubleword and quadword arithmetic.
 481
 482 ;; NOTE: some other instructions also support 64-bit integer
 483 ;; element size, which we could potentially use for "long long" operations.
 484
 485 (define_insn "*add<mode>3_neon"
 486   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 487         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 488                   (match_operand:VDQ 2 "s_register_operand" "w")))]
 489   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 490   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 491   [(set (attr "type")
 492       (if_then_else (match_test "<Is_float_mode>")
 493                     (const_string "neon_fp_addsub_s<q>")
 494                     (const_string "neon_add<q>")))]
 495 )
 496
 497 ;; As with SFmode, full support for HFmode vector arithmetic is only available
 498 ;; when flag-unsafe-math-optimizations is enabled.
 499
 500 (define_insn "add<mode>3"
 501   [(set
 502     (match_operand:VH 0 "s_register_operand" "=w")
 503     (plus:VH
 504      (match_operand:VH 1 "s_register_operand" "w")
 505      (match_operand:VH 2 "s_register_operand" "w")))]
 506  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 507  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 508  [(set (attr "type")
 509    (if_then_else (match_test "<Is_float_mode>")
 510     (const_string "neon_fp_addsub_s<q>")
 511     (const_string "neon_add<q>")))]
 512 )
 513
 514 (define_insn "add<mode>3_fp16"
 515   [(set
 516     (match_operand:VH 0 "s_register_operand" "=w")
 517     (plus:VH
 518      (match_operand:VH 1 "s_register_operand" "w")
 519      (match_operand:VH 2 "s_register_operand" "w")))]
 520  "TARGET_NEON_FP16INST"
 521  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 522  [(set (attr "type")
 523    (if_then_else (match_test "<Is_float_mode>")
 524     (const_string "neon_fp_addsub_s<q>")
 525     (const_string "neon_add<q>")))]
 526 )
 527
 528 (define_insn "adddi3_neon"
 529   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
 530         (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
 531                  (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
 532    (clobber (reg:CC CC_REGNUM))]
 533   "TARGET_NEON"
 534 {
 535   switch (which_alternative)
 536     {
 537     case 0: /* fall through */
 538     case 3: return "vadd.i64\t%P0, %P1, %P2";
 539     case 1: return "#";
 540     case 2: return "#";
 541     case 4: return "#";
 542     case 5: return "#";
 543     case 6: return "#";
 544     default: gcc_unreachable ();
 545     }
 546 }
 547   [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
 548                      multiple,multiple,multiple")
 549    (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
 550    (set_attr "length" "*,8,8,*,8,8,8")
 551    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
 552 )
 553
 554 (define_insn "*sub<mode>3_neon"
 555   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 556         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 557                    (match_operand:VDQ 2 "s_register_operand" "w")))]
 558   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 559   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 560   [(set (attr "type")
 561       (if_then_else (match_test "<Is_float_mode>")
 562                     (const_string "neon_fp_addsub_s<q>")
 563                     (const_string "neon_sub<q>")))]
 564 )
 565
 566 (define_insn "sub<mode>3"
 567  [(set
 568    (match_operand:VH 0 "s_register_operand" "=w")
 569    (minus:VH
 570     (match_operand:VH 1 "s_register_operand" "w")
 571     (match_operand:VH 2 "s_register_operand" "w")))]
 572  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 573  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 574  [(set_attr "type" "neon_sub<q>")]
 575 )
 576
 577 (define_insn "sub<mode>3_fp16"
 578  [(set
 579    (match_operand:VH 0 "s_register_operand" "=w")
 580    (minus:VH
 581     (match_operand:VH 1 "s_register_operand" "w")
 582     (match_operand:VH 2 "s_register_operand" "w")))]
 583  "TARGET_NEON_FP16INST"
 584  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 585  [(set_attr "type" "neon_sub<q>")]
 586 )
 587
 588 (define_insn "subdi3_neon"
 589   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
 590         (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
 591                   (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
 592    (clobber (reg:CC CC_REGNUM))]
 593   "TARGET_NEON"
 594 {
 595   switch (which_alternative)
 596     {
 597     case 0: /* fall through */
 598     case 4: return "vsub.i64\t%P0, %P1, %P2";
 599     case 1: /* fall through */
 600     case 2: /* fall through */
 601     case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
 602     default: gcc_unreachable ();
 603     }
 604 }
 605   [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
 606    (set_attr "conds" "*,clob,clob,clob,*")
 607    (set_attr "length" "*,8,8,8,*")
 608    (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
 609 )
 610
 611 (define_insn "*mul<mode>3_neon"
 612   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 613         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
 614                    (match_operand:VDQW 2 "s_register_operand" "w")))]
 615   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 616   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 617   [(set (attr "type")
 618       (if_then_else (match_test "<Is_float_mode>")
 619                     (const_string "neon_fp_mul_s<q>")
 620                     (const_string "neon_mul_<V_elem_ch><q>")))]
 621 )
 622
 623 /* Perform division using multiply-by-reciprocal.
 624    Reciprocal is calculated using Newton-Raphson method.
 625    Enabled with -funsafe-math-optimizations -freciprocal-math
 626    and disabled for -Os since it increases code size .  */
 627
 628 (define_expand "div<mode>3"
 629   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
 630         (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
 631                   (match_operand:VCVTF 2 "s_register_operand" "w")))]
 632   "TARGET_NEON && !optimize_size
 633    && flag_reciprocal_math"
 634   {
 635     rtx rec = gen_reg_rtx (<MODE>mode);
 636     rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
 637
 638     /* Reciprocal estimate.  */
 639     emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
 640
 641     /* Perform 2 iterations of newton-raphson method.  */
 642     for (int i = 0; i < 2; i++)
 643       {
 644         emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
 645         emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
 646       }
 647
 648     /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
 649     emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
 650     DONE;
 651   }
 652 )
 653
 654
 655 (define_insn "mul<mode>3add<mode>_neon"
 656   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 657         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
 658                             (match_operand:VDQW 3 "s_register_operand" "w"))
 659                   (match_operand:VDQW 1 "s_register_operand" "0")))]
 660   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 661   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 662   [(set (attr "type")
 663       (if_then_else (match_test "<Is_float_mode>")
 664                     (const_string "neon_fp_mla_s<q>")
 665                     (const_string "neon_mla_<V_elem_ch><q>")))]
 666 )
 667
 668 (define_insn "mul<mode>3add<mode>_neon"
 669   [(set (match_operand:VH 0 "s_register_operand" "=w")
 670         (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
 671                           (match_operand:VH 3 "s_register_operand" "w"))
 672                   (match_operand:VH 1 "s_register_operand" "0")))]
 673   "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 674   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 675   [(set_attr "type" "neon_fp_mla_s<q>")]
 676 )
 677
 678 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
 679   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 680         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
 681                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
 682                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
 683   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 684   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 685   [(set (attr "type")
 686       (if_then_else (match_test "<Is_float_mode>")
 687                     (const_string "neon_fp_mla_s<q>")
 688                     (const_string "neon_mla_<V_elem_ch><q>")))]
 689 )
 690
 691 ;; Fused multiply-accumulate
 692 ;; We define each insn twice here:
 693 ;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
 694 ;;       to be able to use when converting to FMA.
 695 ;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
 696 (define_insn "fma<VCVTF:mode>4"
 697   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 698         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
 699                  (match_operand:VCVTF 2 "register_operand" "w")
 700                  (match_operand:VCVTF 3 "register_operand" "0")))]
 701   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
 702   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 703   [(set_attr "type" "neon_fp_mla_s<q>")]
 704 )
 705
 706 (define_insn "fma<VCVTF:mode>4_intrinsic"
 707   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 708         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
 709                  (match_operand:VCVTF 2 "register_operand" "w")
 710                  (match_operand:VCVTF 3 "register_operand" "0")))]
 711   "TARGET_NEON && TARGET_FMA"
 712   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 713   [(set_attr "type" "neon_fp_mla_s<q>")]
 714 )
 715
 716 (define_insn "fma<VH:mode>4"
 717  [(set (match_operand:VH 0 "register_operand" "=w")
 718    (fma:VH
 719     (match_operand:VH 1 "register_operand" "w")
 720     (match_operand:VH 2 "register_operand" "w")
 721     (match_operand:VH 3 "register_operand" "0")))]
 722  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 723  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 724  [(set_attr "type" "neon_fp_mla_s<q>")]
 725 )
 726
 727 (define_insn "fma<VH:mode>4_intrinsic"
 728  [(set (match_operand:VH 0 "register_operand" "=w")
 729    (fma:VH
 730     (match_operand:VH 1 "register_operand" "w")
 731     (match_operand:VH 2 "register_operand" "w")
 732     (match_operand:VH 3 "register_operand" "0")))]
 733  "TARGET_NEON_FP16INST"
 734  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 735  [(set_attr "type" "neon_fp_mla_s<q>")]
 736 )
 737
 738 (define_insn "*fmsub<VCVTF:mode>4"
 739   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 740         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
 741                    (match_operand:VCVTF 2 "register_operand" "w")
 742                    (match_operand:VCVTF 3 "register_operand" "0")))]
 743   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
 744   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 745   [(set_attr "type" "neon_fp_mla_s<q>")]
 746 )
 747
 748 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
 749  [(set (match_operand:VCVTF 0 "register_operand" "=w")
 750    (fma:VCVTF
 751     (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
 752     (match_operand:VCVTF 2 "register_operand" "w")
 753     (match_operand:VCVTF 3 "register_operand" "0")))]
 754  "TARGET_NEON && TARGET_FMA"
 755  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 756  [(set_attr "type" "neon_fp_mla_s<q>")]
 757 )
 758
 759 (define_insn "fmsub<VH:mode>4_intrinsic"
 760  [(set (match_operand:VH 0 "register_operand" "=w")
 761    (fma:VH
 762     (neg:VH (match_operand:VH 1 "register_operand" "w"))
 763     (match_operand:VH 2 "register_operand" "w")
 764     (match_operand:VH 3 "register_operand" "0")))]
 765  "TARGET_NEON_FP16INST"
 766  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 767  [(set_attr "type" "neon_fp_mla_s<q>")]
 768 )
 769
 770 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
 771   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
 772         (unspec:VCVTF [(match_operand:VCVTF 1
 773                          "s_register_operand" "w")]
 774                 NEON_VRINT))]
 775   "TARGET_NEON && TARGET_VFP5"
 776   "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
 777   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
 778 )
 779
 780 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
 781   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
 782         (FIXUORS:<V_cmp_result> (unspec:VCVTF
 783                                [(match_operand:VCVTF 1 "register_operand" "w")]
 784                                NEON_VCVT)))]
 785   "TARGET_NEON && TARGET_VFP5"
 786   "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
 787   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
 788    (set_attr "predicable" "no")]
 789 )
 790
 791 (define_insn "ior<mode>3"
 792   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 793         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
 794                  (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
 795   "TARGET_NEON"
 796 {
 797   switch (which_alternative)
 798     {
 799     case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
 800     case 1: return neon_output_logic_immediate ("vorr", &operands[2],
 801                      <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
 802     default: gcc_unreachable ();
 803     }
 804 }
 805   [(set_attr "type" "neon_logic<q>")]
 806 )
 807
 808 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
 809 ;; vorr. We support the pseudo-instruction vand instead, because that
 810 ;; corresponds to the canonical form the middle-end expects to use for
 811 ;; immediate bitwise-ANDs.
 812
 813 (define_insn "and<mode>3"
 814   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 815         (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
 816                  (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
 817   "TARGET_NEON"
 818 {
 819   switch (which_alternative)
 820     {
 821     case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
 822     case 1: return neon_output_logic_immediate ("vand", &operands[2],
 823                      <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
 824     default: gcc_unreachable ();
 825     }
 826 }
 827   [(set_attr "type" "neon_logic<q>")]
 828 )
 829
 830 (define_insn "orn<mode>3_neon"
 831   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 832         (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
 833                  (match_operand:VDQ 1 "s_register_operand" "w")))]
 834   "TARGET_NEON"
 835   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 836   [(set_attr "type" "neon_logic<q>")]
 837 )
 838
 839 ;; TODO: investigate whether we should disable
 840 ;; this and bicdi3_neon for the A8 in line with the other
 841 ;; changes above.
 842 (define_insn_and_split "orndi3_neon"
 843   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
 844         (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
 845                 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
 846   "TARGET_NEON"
 847   "@
 848    vorn\t%P0, %P1, %P2
 849    #
 850    #
 851    #"
 852   "reload_completed &&
 853    (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
 854   [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
 855    (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
 856   "
 857   {
 858     if (TARGET_THUMB2)
 859       {
 860         operands[3] = gen_highpart (SImode, operands[0]);
 861         operands[0] = gen_lowpart (SImode, operands[0]);
 862         operands[4] = gen_highpart (SImode, operands[2]);
 863         operands[2] = gen_lowpart (SImode, operands[2]);
 864         operands[5] = gen_highpart (SImode, operands[1]);
 865         operands[1] = gen_lowpart (SImode, operands[1]);
 866       }
 867     else
 868       {
 869         emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
 870         emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
 871         DONE;
 872       }
 873   }"
 874   [(set_attr "type" "neon_logic,multiple,multiple,multiple")
 875    (set_attr "length" "*,16,8,8")
 876    (set_attr "arch" "any,a,t2,t2")]
 877 )
 878
 879 (define_insn "bic<mode>3_neon"
 880   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 881         (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
 882                  (match_operand:VDQ 1 "s_register_operand" "w")))]
 883   "TARGET_NEON"
 884   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 885   [(set_attr "type" "neon_logic<q>")]
 886 )
 887
 888 ;; Compare to *anddi_notdi_di.
 889 (define_insn "bicdi3_neon"
 890   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
 891         (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
 892                 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
 893   "TARGET_NEON"
 894   "@
 895    vbic\t%P0, %P1, %P2
 896    #
 897    #"
 898   [(set_attr "type" "neon_logic,multiple,multiple")
 899    (set_attr "length" "*,8,8")]
 900 )
 901
 902 (define_insn "xor<mode>3"
 903   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 904         (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 905                  (match_operand:VDQ 2 "s_register_operand" "w")))]
 906   "TARGET_NEON"
 907   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 908   [(set_attr "type" "neon_logic<q>")]
 909 )
 910
 911 (define_insn "one_cmpl<mode>2"
 912   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 913         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
 914   "TARGET_NEON"
 915   "vmvn\t%<V_reg>0, %<V_reg>1"
 916   [(set_attr "type" "neon_move<q>")]
 917 )
 918
 919 (define_insn "abs<mode>2"
 920   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 921         (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
 922   "TARGET_NEON"
 923   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 924   [(set (attr "type")
 925       (if_then_else (match_test "<Is_float_mode>")
 926                     (const_string "neon_fp_abs_s<q>")
 927                     (const_string "neon_abs<q>")))]
 928 )
 929
 930 (define_insn "neg<mode>2"
 931   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 932         (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
 933   "TARGET_NEON"
 934   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 935   [(set (attr "type")
 936       (if_then_else (match_test "<Is_float_mode>")
 937                     (const_string "neon_fp_neg_s<q>")
 938                     (const_string "neon_neg<q>")))]
 939 )
 940
 941 (define_insn "negdi2_neon"
 942   [(set (match_operand:DI 0 "s_register_operand"         "=&w, w,r,&r")
 943         (neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
 944    (clobber (match_scratch:DI 2                          "= X,&w,X, X"))
 945    (clobber (reg:CC CC_REGNUM))]
 946   "TARGET_NEON"
 947   "#"
 948   [(set_attr "length" "8")
 949    (set_attr "type" "multiple")]
 950 )
 951
 952 ; Split negdi2_neon for vfp registers
 953 (define_split
 954   [(set (match_operand:DI 0 "s_register_operand" "")
 955         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
 956    (clobber (match_scratch:DI 2 ""))
 957    (clobber (reg:CC CC_REGNUM))]
 958   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
 959   [(set (match_dup 2) (const_int 0))
 960    (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
 961               (clobber (reg:CC CC_REGNUM))])]
 962   {
 963     if (!REG_P (operands[2]))
 964       operands[2] = operands[0];
 965   }
 966 )
 967
 968 ; Split negdi2_neon for core registers
 969 (define_split
 970   [(set (match_operand:DI 0 "s_register_operand" "")
 971         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
 972    (clobber (match_scratch:DI 2 ""))
 973    (clobber (reg:CC CC_REGNUM))]
 974   "TARGET_32BIT && reload_completed
 975    && arm_general_register_operand (operands[0], DImode)"
 976   [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
 977               (clobber (reg:CC CC_REGNUM))])]
 978   ""
 979 )
 980
 981 (define_insn "<absneg_str><mode>2"
 982   [(set (match_operand:VH 0 "s_register_operand" "=w")
 983     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
 984  "TARGET_NEON_FP16INST"
 985  "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 986  [(set_attr "type" "neon_abs<q>")]
 987 )
 988
 989 (define_expand "neon_v<absneg_str><mode>"
 990  [(set
 991    (match_operand:VH 0 "s_register_operand")
 992    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
 993  "TARGET_NEON_FP16INST"
 994 {
 995   emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
 996   DONE;
 997 })
 998
 999 (define_insn "neon_v<fp16_rnd_str><mode>"
1000   [(set (match_operand:VH 0 "s_register_operand" "=w")
1001     (unspec:VH
1002      [(match_operand:VH 1 "s_register_operand" "w")]
1003      FP16_RND))]
1004  "TARGET_NEON_FP16INST"
1005  "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1006  [(set_attr "type" "neon_fp_round_s<q>")]
1007 )
1008
1009 (define_insn "neon_vrsqrte<mode>"
1010   [(set (match_operand:VH 0 "s_register_operand" "=w")
1011     (unspec:VH
1012      [(match_operand:VH 1 "s_register_operand" "w")]
1013      UNSPEC_VRSQRTE))]
1014   "TARGET_NEON_FP16INST"
1015   "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1016  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1017 )
1018
1019 (define_insn "*umin<mode>3_neon"
1020   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1021         (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1022                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1023   "TARGET_NEON"
1024   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1025   [(set_attr "type" "neon_minmax<q>")]
1026 )
1027
1028 (define_insn "*umax<mode>3_neon"
1029   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1030         (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1031                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1032   "TARGET_NEON"
1033   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034   [(set_attr "type" "neon_minmax<q>")]
1035 )
1036
1037 (define_insn "*smin<mode>3_neon"
1038   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1039         (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1040                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1041   "TARGET_NEON"
1042   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1043   [(set (attr "type")
1044       (if_then_else (match_test "<Is_float_mode>")
1045                     (const_string "neon_fp_minmax_s<q>")
1046                     (const_string "neon_minmax<q>")))]
1047 )
1048
1049 (define_insn "*smax<mode>3_neon"
1050   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1051         (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1052                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1053   "TARGET_NEON"
1054   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1055   [(set (attr "type")
1056       (if_then_else (match_test "<Is_float_mode>")
1057                     (const_string "neon_fp_minmax_s<q>")
1058                     (const_string "neon_minmax<q>")))]
1059 )
1060
1061 ; TODO: V2DI shifts are current disabled because there are bugs in the
1062 ; generic vectorizer code.  It ends up creating a V2DI constructor with
1063 ; SImode elements.
1064
1065 (define_insn "vashl<mode>3"
1066   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1067         (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1068                       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1069   "TARGET_NEON"
1070   {
1071     switch (which_alternative)
1072       {
1073         case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1074         case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1075                                                     <MODE>mode,
1076                                                     VALID_NEON_QREG_MODE (<MODE>mode),
1077                                                     true);
1078         default: gcc_unreachable ();
1079       }
1080   }
1081   [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1082 )
1083
1084 (define_insn "vashr<mode>3_imm"
1085   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1086         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1087                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1088   "TARGET_NEON"
1089   {
1090     return neon_output_shift_immediate ("vshr", 's', &operands[2],
1091                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1092                                         false);
1093   }
1094   [(set_attr "type" "neon_shift_imm<q>")]
1095 )
1096
1097 (define_insn "vlshr<mode>3_imm"
1098   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1099         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1100                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1101   "TARGET_NEON"
1102   {
1103     return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1104                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1105                                         false);
1106   }
1107   [(set_attr "type" "neon_shift_imm<q>")]
1108 )
1109
1110 ; Used for implementing logical shift-right, which is a left-shift by a negative
1111 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1112 ; above, but using an unspec in case GCC tries anything tricky with negative
1113 ; shift amounts.
1114
1115 (define_insn "ashl<mode>3_signed"
1116   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1117         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1118                       (match_operand:VDQI 2 "s_register_operand" "w")]
1119                      UNSPEC_ASHIFT_SIGNED))]
1120   "TARGET_NEON"
1121   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1122   [(set_attr "type" "neon_shift_reg<q>")]
1123 )
1124
1125 ; Used for implementing logical shift-right, which is a left-shift by a negative
1126 ; amount, with unsigned operands.
1127
1128 (define_insn "ashl<mode>3_unsigned"
1129   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1130         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1131                       (match_operand:VDQI 2 "s_register_operand" "w")]
1132                      UNSPEC_ASHIFT_UNSIGNED))]
1133   "TARGET_NEON"
1134   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1135   [(set_attr "type" "neon_shift_reg<q>")]
1136 )
1137
1138 (define_expand "vashr<mode>3"
1139   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1140         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1141                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1142   "TARGET_NEON"
1143 {
1144   if (s_register_operand (operands[2], <MODE>mode))
1145     {
1146       rtx neg = gen_reg_rtx (<MODE>mode);
1147       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1148       emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1149     }
1150   else
1151     emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1152   DONE;
1153 })
1154
1155 (define_expand "vlshr<mode>3"
1156   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1157         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1158                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1159   "TARGET_NEON"
1160 {
1161   if (s_register_operand (operands[2], <MODE>mode))
1162     {
1163       rtx neg = gen_reg_rtx (<MODE>mode);
1164       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1165       emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1166     }
1167   else
1168     emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1169   DONE;
1170 })
1171
1172 ;; 64-bit shifts
1173
1174 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1175 ;; leaving the upper half uninitalized.  This is OK since the shift
1176 ;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1177 ;; data flow analysis however, we pretend the full register is set
1178 ;; using an unspec.
1179 (define_insn "neon_load_count"
1180   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1181         (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1182                    UNSPEC_LOAD_COUNT))]
1183   "TARGET_NEON"
1184   "@
1185    vld1.32\t{%P0[0]}, %A1
1186    vmov.32\t%P0[0], %1"
1187   [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1188 )
1189
1190 (define_insn "ashldi3_neon_noclobber"
1191   [(set (match_operand:DI 0 "s_register_operand"            "=w,w")
1192         (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1193                    (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1194   "TARGET_NEON && reload_completed
1195    && (!CONST_INT_P (operands[2])
1196        || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1197   "@
1198    vshl.u64\t%P0, %P1, %2
1199    vshl.u64\t%P0, %P1, %P2"
1200   [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1201 )
1202
1203 (define_insn_and_split "ashldi3_neon"
1204   [(set (match_operand:DI 0 "s_register_operand"            "= w, w, &r, r, &r, ?w,?w")
1205         (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w, w")
1206                    (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm, i")))
1207    (clobber (match_scratch:SI 3                             "= X, X, &r, X,  X,  X, X"))
1208    (clobber (match_scratch:SI 4                             "= X, X, &r, X,  X,  X, X"))
1209    (clobber (match_scratch:DI 5                             "=&w, X,  X, X,  X, &w, X"))
1210    (clobber (reg:CC_C CC_REGNUM))]
1211   "TARGET_NEON"
1212   "#"
1213   "TARGET_NEON && reload_completed"
1214   [(const_int 0)]
1215   "
1216   {
1217     if (IS_VFP_REGNUM (REGNO (operands[0])))
1218       {
1219         if (CONST_INT_P (operands[2]))
1220           {
1221             if (INTVAL (operands[2]) < 1)
1222               {
1223                 emit_insn (gen_movdi (operands[0], operands[1]));
1224                 DONE;
1225               }
1226             else if (INTVAL (operands[2]) > 63)
1227               operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1228           }
1229         else
1230           {
1231             emit_insn (gen_neon_load_count (operands[5], operands[2]));
1232             operands[2] = operands[5];
1233           }
1234
1235         /* Ditch the unnecessary clobbers.  */
1236         emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1237                                                operands[2]));
1238       }
1239     else
1240       {
1241         /* The shift expanders support either full overlap or no overlap.  */
1242         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1243                     || REGNO (operands[0]) == REGNO (operands[1]));
1244
1245         arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1246                                        operands[2], operands[3], operands[4]);
1247       }
1248     DONE;
1249   }"
1250   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1251    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1252    (set_attr "type" "multiple")]
1253 )
1254
1255 ; The shift amount needs to be negated for right-shifts
1256 (define_insn "signed_shift_di3_neon"
1257   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1258         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1259                     (match_operand:DI 2 "s_register_operand" " w")]
1260                    UNSPEC_ASHIFT_SIGNED))]
1261   "TARGET_NEON && reload_completed"
1262   "vshl.s64\t%P0, %P1, %P2"
1263   [(set_attr "type" "neon_shift_reg")]
1264 )
1265
1266 ; The shift amount needs to be negated for right-shifts
1267 (define_insn "unsigned_shift_di3_neon"
1268   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1269         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1270                     (match_operand:DI 2 "s_register_operand" " w")]
1271                    UNSPEC_ASHIFT_UNSIGNED))]
1272   "TARGET_NEON && reload_completed"
1273   "vshl.u64\t%P0, %P1, %P2"
1274   [(set_attr "type" "neon_shift_reg")]
1275 )
1276
1277 (define_insn "ashrdi3_neon_imm_noclobber"
1278   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1279         (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1280                      (match_operand:DI 2 "const_int_operand"  " i")))]
1281   "TARGET_NEON && reload_completed
1282    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1283   "vshr.s64\t%P0, %P1, %2"
1284   [(set_attr "type" "neon_shift_imm")]
1285 )
1286
1287 (define_insn "lshrdi3_neon_imm_noclobber"
1288   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1289         (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1290                      (match_operand:DI 2 "const_int_operand"  " i")))]
1291   "TARGET_NEON && reload_completed
1292    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1293   "vshr.u64\t%P0, %P1, %2"
1294   [(set_attr "type" "neon_shift_imm")]
1295 )
1296
1297 ;; ashrdi3_neon
1298 ;; lshrdi3_neon
1299 (define_insn_and_split "<shift>di3_neon"
1300   [(set (match_operand:DI 0 "s_register_operand"             "= w, w, &r, r, &r,?w,?w")
1301         (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
1302                     (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
1303    (clobber (match_scratch:SI 3                              "=2r, X, &r, X,  X,2r, X"))
1304    (clobber (match_scratch:SI 4                              "= X, X, &r, X,  X, X, X"))
1305    (clobber (match_scratch:DI 5                              "=&w, X,  X, X, X,&w, X"))
1306    (clobber (reg:CC CC_REGNUM))]
1307   "TARGET_NEON"
1308   "#"
1309   "TARGET_NEON && reload_completed"
1310   [(const_int 0)]
1311   "
1312   {
1313     if (IS_VFP_REGNUM (REGNO (operands[0])))
1314       {
1315         if (CONST_INT_P (operands[2]))
1316           {
1317             if (INTVAL (operands[2]) < 1)
1318               {
1319                 emit_insn (gen_movdi (operands[0], operands[1]));
1320                 DONE;
1321               }
1322             else if (INTVAL (operands[2]) > 64)
1323               operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1324
1325             /* Ditch the unnecessary clobbers.  */
1326             emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1327                                                           operands[1],
1328                                                           operands[2]));
1329           }
1330         else
1331           {
1332             /* We must use a negative left-shift.  */
1333             emit_insn (gen_negsi2 (operands[3], operands[2]));
1334             emit_insn (gen_neon_load_count (operands[5], operands[3]));
1335             emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1336                                                        operands[5]));
1337           }
1338       }
1339     else
1340       {
1341         /* The shift expanders support either full overlap or no overlap.  */
1342         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1343                     || REGNO (operands[0]) == REGNO (operands[1]));
1344
1345         /* This clobbers CC (ASHIFTRT by register only).  */
1346         arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1347                                        operands[2], operands[3], operands[4]);
1348       }
1349
1350     DONE;
1351   }"
1352   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1353    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1354    (set_attr "type" "multiple")]
1355 )
1356
1357 ;; Widening operations
1358
1359 (define_expand "widen_ssum<mode>3"
1360   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1361         (plus:<V_double_width>
1362          (sign_extend:<V_double_width>
1363           (match_operand:VQI 1 "s_register_operand" ""))
1364          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1365   "TARGET_NEON"
1366   {
1367     machine_mode mode = GET_MODE (operands[1]);
1368     rtx p1, p2;
1369
1370     p1  = arm_simd_vect_par_cnst_half (mode, false);
1371     p2  = arm_simd_vect_par_cnst_half (mode, true);
1372
1373     if (operands[0] != operands[2])
1374       emit_move_insn (operands[0], operands[2]);
1375
1376     emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1377                                                          operands[1],
1378                                                          p1,
1379                                                          operands[0]));
1380     emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1381                                                          operands[1],
1382                                                          p2,
1383                                                          operands[0]));
1384     DONE;
1385   }
1386 )
1387
1388 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1389   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1390         (plus:<V_double_width>
1391          (sign_extend:<V_double_width>
1392           (vec_select:<V_HALF>
1393            (match_operand:VQI 1 "s_register_operand" "%w")
1394            (match_operand:VQI 2 "vect_par_constant_low" "")))
1395          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1396   "TARGET_NEON"
1397 {
1398   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1399     "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1400 }
1401   [(set_attr "type" "neon_add_widen")])
1402
1403 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1404   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1405         (plus:<V_double_width>
1406          (sign_extend:<V_double_width>
1407           (vec_select:<V_HALF>
1408                          (match_operand:VQI 1 "s_register_operand" "%w")
1409                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1410          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1411   "TARGET_NEON"
1412 {
1413   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1414     "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1415 }
1416   [(set_attr "type" "neon_add_widen")])
1417
1418 (define_insn "widen_ssum<mode>3"
1419   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1420         (plus:<V_widen>
1421          (sign_extend:<V_widen>
1422           (match_operand:VW 1 "s_register_operand" "%w"))
1423          (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1424   "TARGET_NEON"
1425   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1426   [(set_attr "type" "neon_add_widen")]
1427 )
1428
1429 (define_expand "widen_usum<mode>3"
1430   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1431         (plus:<V_double_width>
1432          (zero_extend:<V_double_width>
1433           (match_operand:VQI 1 "s_register_operand" ""))
1434          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1435   "TARGET_NEON"
1436   {
1437     machine_mode mode = GET_MODE (operands[1]);
1438     rtx p1, p2;
1439
1440     p1  = arm_simd_vect_par_cnst_half (mode, false);
1441     p2  = arm_simd_vect_par_cnst_half (mode, true);
1442
1443     if (operands[0] != operands[2])
1444       emit_move_insn (operands[0], operands[2]);
1445
1446     emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1447                                                          operands[1],
1448                                                          p1,
1449                                                          operands[0]));
1450     emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1451                                                          operands[1],
1452                                                          p2,
1453                                                          operands[0]));
1454     DONE;
1455   }
1456 )
1457
1458 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1459   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1460         (plus:<V_double_width>
1461          (zero_extend:<V_double_width>
1462           (vec_select:<V_HALF>
1463            (match_operand:VQI 1 "s_register_operand" "%w")
1464            (match_operand:VQI 2 "vect_par_constant_low" "")))
1465          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1466   "TARGET_NEON"
1467 {
1468   return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1469     "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1470 }
1471   [(set_attr "type" "neon_add_widen")])
1472
1473 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1474   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1475         (plus:<V_double_width>
1476          (zero_extend:<V_double_width>
1477           (vec_select:<V_HALF>
1478                          (match_operand:VQI 1 "s_register_operand" "%w")
1479                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1480          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1481   "TARGET_NEON"
1482 {
1483  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1484     "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1485 }
1486   [(set_attr "type" "neon_add_widen")])
1487
1488 (define_insn "widen_usum<mode>3"
1489   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1490         (plus:<V_widen> (zero_extend:<V_widen>
1491                           (match_operand:VW 1 "s_register_operand" "%w"))
1492                         (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1493   "TARGET_NEON"
1494   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1495   [(set_attr "type" "neon_add_widen")]
1496 )
1497
1498 ;; Helpers for quad-word reduction operations
1499
1500 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1501 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1502 ; N/2-element vector.
1503
1504 (define_insn "quad_halves_<code>v4si"
1505   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1506         (VQH_OPS:V2SI
1507           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1508                            (parallel [(const_int 0) (const_int 1)]))
1509           (vec_select:V2SI (match_dup 1)
1510                            (parallel [(const_int 2) (const_int 3)]))))]
1511   "TARGET_NEON"
1512   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1513   [(set_attr "vqh_mnem" "<VQH_mnem>")
1514    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1515 )
1516
1517 (define_insn "quad_halves_<code>v4sf"
1518   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1519         (VQHS_OPS:V2SF
1520           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1521                            (parallel [(const_int 0) (const_int 1)]))
1522           (vec_select:V2SF (match_dup 1)
1523                            (parallel [(const_int 2) (const_int 3)]))))]
1524   "TARGET_NEON && flag_unsafe_math_optimizations"
1525   "<VQH_mnem>.f32\t%P0, %e1, %f1"
1526   [(set_attr "vqh_mnem" "<VQH_mnem>")
1527    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1528 )
1529
1530 (define_insn "quad_halves_<code>v8hi"
1531   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1532         (VQH_OPS:V4HI
1533           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1534                            (parallel [(const_int 0) (const_int 1)
1535                                       (const_int 2) (const_int 3)]))
1536           (vec_select:V4HI (match_dup 1)
1537                            (parallel [(const_int 4) (const_int 5)
1538                                       (const_int 6) (const_int 7)]))))]
1539   "TARGET_NEON"
1540   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1541   [(set_attr "vqh_mnem" "<VQH_mnem>")
1542    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1543 )
1544
1545 (define_insn "quad_halves_<code>v16qi"
1546   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1547         (VQH_OPS:V8QI
1548           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1549                            (parallel [(const_int 0) (const_int 1)
1550                                       (const_int 2) (const_int 3)
1551                                       (const_int 4) (const_int 5)
1552                                       (const_int 6) (const_int 7)]))
1553           (vec_select:V8QI (match_dup 1)
1554                            (parallel [(const_int 8) (const_int 9)
1555                                       (const_int 10) (const_int 11)
1556                                       (const_int 12) (const_int 13)
1557                                       (const_int 14) (const_int 15)]))))]
1558   "TARGET_NEON"
1559   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1560   [(set_attr "vqh_mnem" "<VQH_mnem>")
1561    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1562 )
1563
1564 (define_expand "move_hi_quad_<mode>"
1565  [(match_operand:ANY128 0 "s_register_operand" "")
1566   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1567  "TARGET_NEON"
1568 {
1569   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1570                                        GET_MODE_SIZE (<V_HALF>mode)),
1571                   operands[1]);
1572   DONE;
1573 })
1574
1575 (define_expand "move_lo_quad_<mode>"
1576  [(match_operand:ANY128 0 "s_register_operand" "")
1577   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1578  "TARGET_NEON"
1579 {
1580   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1581                                        <MODE>mode, 0),
1582                   operands[1]);
1583   DONE;
1584 })
1585
1586 ;; Reduction operations
1587
1588 (define_expand "reduc_plus_scal_<mode>"
1589   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590    (match_operand:VD 1 "s_register_operand" "")]
1591   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1592 {
1593   rtx vec = gen_reg_rtx (<MODE>mode);
1594   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1595                         &gen_neon_vpadd_internal<mode>);
1596   /* The same result is actually computed into every element.  */
1597   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1598   DONE;
1599 })
1600
1601 (define_expand "reduc_plus_scal_<mode>"
1602   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1603    (match_operand:VQ 1 "s_register_operand" "")]
1604   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1605    && !BYTES_BIG_ENDIAN"
1606 {
1607   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1608
1609   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1610   emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1611
1612   DONE;
1613 })
1614
1615 (define_expand "reduc_plus_scal_v2di"
1616   [(match_operand:DI 0 "nonimmediate_operand" "=w")
1617    (match_operand:V2DI 1 "s_register_operand" "")]
1618   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1619 {
1620   rtx vec = gen_reg_rtx (V2DImode);
1621
1622   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1623   emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1624
1625   DONE;
1626 })
1627
1628 (define_insn "arm_reduc_plus_internal_v2di"
1629   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1630         (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1631                      UNSPEC_VPADD))]
1632   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1633   "vadd.i64\t%e0, %e1, %f1"
1634   [(set_attr "type" "neon_add_q")]
1635 )
1636
1637 (define_expand "reduc_smin_scal_<mode>"
1638   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1639    (match_operand:VD 1 "s_register_operand" "")]
1640   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1641 {
1642   rtx vec = gen_reg_rtx (<MODE>mode);
1643
1644   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1645                         &gen_neon_vpsmin<mode>);
1646   /* The result is computed into every element of the vector.  */
1647   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1648   DONE;
1649 })
1650
1651 (define_expand "reduc_smin_scal_<mode>"
1652   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1653    (match_operand:VQ 1 "s_register_operand" "")]
1654   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1655    && !BYTES_BIG_ENDIAN"
1656 {
1657   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1658
1659   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1660   emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1661
1662   DONE;
1663 })
1664
1665 (define_expand "reduc_smax_scal_<mode>"
1666   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667    (match_operand:VD 1 "s_register_operand" "")]
1668   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1669 {
1670   rtx vec = gen_reg_rtx (<MODE>mode);
1671   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1672                         &gen_neon_vpsmax<mode>);
1673   /* The result is computed into every element of the vector.  */
1674   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1675   DONE;
1676 })
1677
1678 (define_expand "reduc_smax_scal_<mode>"
1679   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1680    (match_operand:VQ 1 "s_register_operand" "")]
1681   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1682    && !BYTES_BIG_ENDIAN"
1683 {
1684   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1685
1686   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1687   emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1688
1689   DONE;
1690 })
1691
1692 (define_expand "reduc_umin_scal_<mode>"
1693   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694    (match_operand:VDI 1 "s_register_operand" "")]
1695   "TARGET_NEON"
1696 {
1697   rtx vec = gen_reg_rtx (<MODE>mode);
1698   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1699                         &gen_neon_vpumin<mode>);
1700   /* The result is computed into every element of the vector.  */
1701   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1702   DONE;
1703 })
1704
1705 (define_expand "reduc_umin_scal_<mode>"
1706   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707    (match_operand:VQI 1 "s_register_operand" "")]
1708   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1709 {
1710   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1711
1712   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1713   emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1714
1715   DONE;
1716 })
1717
1718 (define_expand "reduc_umax_scal_<mode>"
1719   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720    (match_operand:VDI 1 "s_register_operand" "")]
1721   "TARGET_NEON"
1722 {
1723   rtx vec = gen_reg_rtx (<MODE>mode);
1724   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1725                         &gen_neon_vpumax<mode>);
1726   /* The result is computed into every element of the vector.  */
1727   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1728   DONE;
1729 })
1730
1731 (define_expand "reduc_umax_scal_<mode>"
1732   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1733    (match_operand:VQI 1 "s_register_operand" "")]
1734   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1735 {
1736   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1737
1738   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1739   emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1740
1741   DONE;
1742 })
1743
1744 (define_insn "neon_vpadd_internal<mode>"
1745   [(set (match_operand:VD 0 "s_register_operand" "=w")
1746         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1747                     (match_operand:VD 2 "s_register_operand" "w")]
1748                    UNSPEC_VPADD))]
1749   "TARGET_NEON"
1750   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1751   ;; Assume this schedules like vadd.
1752   [(set (attr "type")
1753       (if_then_else (match_test "<Is_float_mode>")
1754                     (const_string "neon_fp_reduc_add_s<q>")
1755                     (const_string "neon_reduc_add<q>")))]
1756 )
1757
1758 (define_insn "neon_vpaddv4hf"
1759  [(set
1760    (match_operand:V4HF 0 "s_register_operand" "=w")
1761    (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1762                  (match_operand:V4HF 2 "s_register_operand" "w")]
1763     UNSPEC_VPADD))]
1764  "TARGET_NEON_FP16INST"
1765  "vpadd.f16\t%P0, %P1, %P2"
1766  [(set_attr "type" "neon_reduc_add")]
1767 )
1768
1769 (define_insn "neon_vpsmin<mode>"
1770   [(set (match_operand:VD 0 "s_register_operand" "=w")
1771         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772                     (match_operand:VD 2 "s_register_operand" "w")]
1773                    UNSPEC_VPSMIN))]
1774   "TARGET_NEON"
1775   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1776   [(set (attr "type")
1777       (if_then_else (match_test "<Is_float_mode>")
1778                     (const_string "neon_fp_reduc_minmax_s<q>")
1779                     (const_string "neon_reduc_minmax<q>")))]
1780 )
1781
1782 (define_insn "neon_vpsmax<mode>"
1783   [(set (match_operand:VD 0 "s_register_operand" "=w")
1784         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1785                     (match_operand:VD 2 "s_register_operand" "w")]
1786                    UNSPEC_VPSMAX))]
1787   "TARGET_NEON"
1788   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1789   [(set (attr "type")
1790       (if_then_else (match_test "<Is_float_mode>")
1791                     (const_string "neon_fp_reduc_minmax_s<q>")
1792                     (const_string "neon_reduc_minmax<q>")))]
1793 )
1794
1795 (define_insn "neon_vpumin<mode>"
1796   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1797         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1798                      (match_operand:VDI 2 "s_register_operand" "w")]
1799                    UNSPEC_VPUMIN))]
1800   "TARGET_NEON"
1801   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1802   [(set_attr "type" "neon_reduc_minmax<q>")]
1803 )
1804
1805 (define_insn "neon_vpumax<mode>"
1806   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1807         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1808                      (match_operand:VDI 2 "s_register_operand" "w")]
1809                    UNSPEC_VPUMAX))]
1810   "TARGET_NEON"
1811   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1812   [(set_attr "type" "neon_reduc_minmax<q>")]
1813 )
1814
1815 ;; Saturating arithmetic
1816
1817 ; NOTE: Neon supports many more saturating variants of instructions than the
1818 ; following, but these are all GCC currently understands.
1819 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1820 ; yet either, although these patterns may be used by intrinsics when they're
1821 ; added.
1822
1823 (define_insn "*ss_add<mode>_neon"
1824   [(set (match_operand:VD 0 "s_register_operand" "=w")
1825        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1826                    (match_operand:VD 2 "s_register_operand" "w")))]
1827   "TARGET_NEON"
1828   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1829   [(set_attr "type" "neon_qadd<q>")]
1830 )
1831
1832 (define_insn "*us_add<mode>_neon"
1833   [(set (match_operand:VD 0 "s_register_operand" "=w")
1834        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1835                    (match_operand:VD 2 "s_register_operand" "w")))]
1836   "TARGET_NEON"
1837   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1838   [(set_attr "type" "neon_qadd<q>")]
1839 )
1840
1841 (define_insn "*ss_sub<mode>_neon"
1842   [(set (match_operand:VD 0 "s_register_operand" "=w")
1843        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1844                     (match_operand:VD 2 "s_register_operand" "w")))]
1845   "TARGET_NEON"
1846   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1847   [(set_attr "type" "neon_qsub<q>")]
1848 )
1849
1850 (define_insn "*us_sub<mode>_neon"
1851   [(set (match_operand:VD 0 "s_register_operand" "=w")
1852        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1853                     (match_operand:VD 2 "s_register_operand" "w")))]
1854   "TARGET_NEON"
1855   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1856   [(set_attr "type" "neon_qsub<q>")]
1857 )
1858
1859 ;; Conditional instructions.  These are comparisons with conditional moves for
1860 ;; vectors.  They perform the assignment:
1861 ;;
1862 ;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1863 ;;
1864 ;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1865 ;; element-wise.
1866
1867 (define_expand "vcond<mode><mode>"
1868   [(set (match_operand:VDQW 0 "s_register_operand" "")
1869         (if_then_else:VDQW
1870           (match_operator 3 "comparison_operator"
1871             [(match_operand:VDQW 4 "s_register_operand" "")
1872              (match_operand:VDQW 5 "nonmemory_operand" "")])
1873           (match_operand:VDQW 1 "s_register_operand" "")
1874           (match_operand:VDQW 2 "s_register_operand" "")))]
1875   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1876 {
1877   int inverse = 0;
1878   int use_zero_form = 0;
1879   int swap_bsl_operands = 0;
1880   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1881   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1882
1883   rtx (*base_comparison) (rtx, rtx, rtx);
1884   rtx (*complimentary_comparison) (rtx, rtx, rtx);
1885
1886   switch (GET_CODE (operands[3]))
1887     {
1888     case GE:
1889     case GT:
1890     case LE:
1891     case LT:
1892     case EQ:
1893       if (operands[5] == CONST0_RTX (<MODE>mode))
1894         {
1895           use_zero_form = 1;
1896           break;
1897         }
1898       /* Fall through.  */
1899     default:
1900       if (!REG_P (operands[5]))
1901         operands[5] = force_reg (<MODE>mode, operands[5]);
1902     }
1903
1904   switch (GET_CODE (operands[3]))
1905     {
1906     case LT:
1907     case UNLT:
1908       inverse = 1;
1909       /* Fall through.  */
1910     case GE:
1911     case UNGE:
1912     case ORDERED:
1913     case UNORDERED:
1914       base_comparison = gen_neon_vcge<mode>;
1915       complimentary_comparison = gen_neon_vcgt<mode>;
1916       break;
1917     case LE:
1918     case UNLE:
1919       inverse = 1;
1920       /* Fall through.  */
1921     case GT:
1922     case UNGT:
1923       base_comparison = gen_neon_vcgt<mode>;
1924       complimentary_comparison = gen_neon_vcge<mode>;
1925       break;
1926     case EQ:
1927     case NE:
1928     case UNEQ:
1929       base_comparison = gen_neon_vceq<mode>;
1930       complimentary_comparison = gen_neon_vceq<mode>;
1931       break;
1932     default:
1933       gcc_unreachable ();
1934     }
1935
1936   switch (GET_CODE (operands[3]))
1937     {
1938     case LT:
1939     case LE:
1940     case GT:
1941     case GE:
1942     case EQ:
1943       /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1944          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1945          a GE b -> a GE b
1946          a GT b -> a GT b
1947          a LE b -> b GE a
1948          a LT b -> b GT a
1949          a EQ b -> a EQ b
1950          Note that there also exist direct comparison against 0 forms,
1951          so catch those as a special case.  */
1952       if (use_zero_form)
1953         {
1954           inverse = 0;
1955           switch (GET_CODE (operands[3]))
1956             {
1957             case LT:
1958               base_comparison = gen_neon_vclt<mode>;
1959               break;
1960             case LE:
1961               base_comparison = gen_neon_vcle<mode>;
1962               break;
1963             default:
1964               /* Do nothing, other zero form cases already have the correct
1965                  base_comparison.  */
1966               break;
1967             }
1968         }
1969
1970       if (!inverse)
1971         emit_insn (base_comparison (mask, operands[4], operands[5]));
1972       else
1973         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1974       break;
1975     case UNLT:
1976     case UNLE:
1977     case UNGT:
1978     case UNGE:
1979     case NE:
1980       /* Vector compare returns false for lanes which are unordered, so if we use
1981          the inverse of the comparison we actually want to emit, then
1982          swap the operands to BSL, we will end up with the correct result.
1983          Note that a NE NaN and NaN NE b are true for all a, b.
1984
1985          Our transformations are:
1986          a GE b -> !(b GT a)
1987          a GT b -> !(b GE a)
1988          a LE b -> !(a GT b)
1989          a LT b -> !(a GE b)
1990          a NE b -> !(a EQ b)  */
1991
1992       if (inverse)
1993         emit_insn (base_comparison (mask, operands[4], operands[5]));
1994       else
1995         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1996
1997       swap_bsl_operands = 1;
1998       break;
1999     case UNEQ:
2000       /* We check (a > b ||  b > a).  combining these comparisons give us
2001          true iff !(a != b && a ORDERED b), swapping the operands to BSL
2002          will then give us (a == b ||  a UNORDERED b) as intended.  */
2003
2004       emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2005       emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2006       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2007       swap_bsl_operands = 1;
2008       break;
2009     case UNORDERED:
2010        /* Operands are ORDERED iff (a > b || b >= a).
2011          Swapping the operands to BSL will give the UNORDERED case.  */
2012      swap_bsl_operands = 1;
2013      /* Fall through.  */
2014     case ORDERED:
2015       emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2016       emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2017       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2018       break;
2019     default:
2020       gcc_unreachable ();
2021     }
2022
2023   if (swap_bsl_operands)
2024     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2025                                     operands[1]));
2026   else
2027     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2028                                     operands[2]));
2029   DONE;
2030 })
2031
2032 (define_expand "vcondu<mode><mode>"
2033   [(set (match_operand:VDQIW 0 "s_register_operand" "")
2034         (if_then_else:VDQIW
2035           (match_operator 3 "arm_comparison_operator"
2036             [(match_operand:VDQIW 4 "s_register_operand" "")
2037              (match_operand:VDQIW 5 "s_register_operand" "")])
2038           (match_operand:VDQIW 1 "s_register_operand" "")
2039           (match_operand:VDQIW 2 "s_register_operand" "")))]
2040   "TARGET_NEON"
2041 {
2042   rtx mask;
2043   int inverse = 0, immediate_zero = 0;
2044
2045   mask = gen_reg_rtx (<V_cmp_result>mode);
2046
2047   if (operands[5] == CONST0_RTX (<MODE>mode))
2048     immediate_zero = 1;
2049   else if (!REG_P (operands[5]))
2050     operands[5] = force_reg (<MODE>mode, operands[5]);
2051
2052   switch (GET_CODE (operands[3]))
2053     {
2054     case GEU:
2055       emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2056       break;
2057
2058     case GTU:
2059       emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2060       break;
2061
2062     case EQ:
2063       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2064       break;
2065
2066     case LEU:
2067       if (immediate_zero)
2068         emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2069       else
2070         emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2071       break;
2072
2073     case LTU:
2074       if (immediate_zero)
2075         emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2076       else
2077         emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2078       break;
2079
2080     case NE:
2081       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2082       inverse = 1;
2083       break;
2084
2085     default:
2086       gcc_unreachable ();
2087     }
2088
2089   if (inverse)
2090     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2091                                     operands[1]));
2092   else
2093     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2094                                     operands[2]));
2095
2096   DONE;
2097 })
2098
2099 ;; Patterns for builtins.
2100
2101 ; good for plain vadd, vaddq.
2102
2103 (define_expand "neon_vadd<mode>"
2104   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2105    (match_operand:VCVTF 1 "s_register_operand" "w")
2106    (match_operand:VCVTF 2 "s_register_operand" "w")]
2107   "TARGET_NEON"
2108 {
2109   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2110     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2111   else
2112     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2113                                            operands[2]));
2114   DONE;
2115 })
2116
2117 (define_expand "neon_vadd<mode>"
2118   [(match_operand:VH 0 "s_register_operand")
2119    (match_operand:VH 1 "s_register_operand")
2120    (match_operand:VH 2 "s_register_operand")]
2121   "TARGET_NEON_FP16INST"
2122 {
2123   emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2124   DONE;
2125 })
2126
2127 (define_expand "neon_vsub<mode>"
2128   [(match_operand:VH 0 "s_register_operand")
2129    (match_operand:VH 1 "s_register_operand")
2130    (match_operand:VH 2 "s_register_operand")]
2131   "TARGET_NEON_FP16INST"
2132 {
2133   emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2134   DONE;
2135 })
2136
2137 ; Note that NEON operations don't support the full IEEE 754 standard: in
2138 ; particular, denormal values are flushed to zero.  This means that GCC cannot
2139 ; use those instructions for autovectorization, etc. unless
2140 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2141 ; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
2142 ; header) must work in either case: if -funsafe-math-optimizations is given,
2143 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2144 ; expand to unspecs (which may potentially limit the extent to which they might
2145 ; be optimized by generic code).
2146
2147 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2148
2149 (define_insn "neon_vadd<mode>_unspec"
2150   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2151         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2152                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2153                      UNSPEC_VADD))]
2154   "TARGET_NEON"
2155   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2156   [(set (attr "type")
2157       (if_then_else (match_test "<Is_float_mode>")
2158                     (const_string "neon_fp_addsub_s<q>")
2159                     (const_string "neon_add<q>")))]
2160 )
2161
2162 (define_insn "neon_vaddl<sup><mode>"
2163   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2164         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2165                            (match_operand:VDI 2 "s_register_operand" "w")]
2166                           VADDL))]
2167   "TARGET_NEON"
2168   "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2169   [(set_attr "type" "neon_add_long")]
2170 )
2171
2172 (define_insn "neon_vaddw<sup><mode>"
2173   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2174         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2175                            (match_operand:VDI 2 "s_register_operand" "w")]
2176                           VADDW))]
2177   "TARGET_NEON"
2178   "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2179   [(set_attr "type" "neon_add_widen")]
2180 )
2181
2182 ; vhadd and vrhadd.
2183
2184 (define_insn "neon_v<r>hadd<sup><mode>"
2185   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2186         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2187                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2188                       VHADD))]
2189   "TARGET_NEON"
2190   "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2191   [(set_attr "type" "neon_add_halve_q")]
2192 )
2193
2194 (define_insn "neon_vqadd<sup><mode>"
2195   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2196         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2197                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2198                      VQADD))]
2199   "TARGET_NEON"
2200   "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2201   [(set_attr "type" "neon_qadd<q>")]
2202 )
2203
2204 (define_insn "neon_v<r>addhn<mode>"
2205   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2206         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2207                             (match_operand:VN 2 "s_register_operand" "w")]
2208                            VADDHN))]
2209   "TARGET_NEON"
2210   "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2211   [(set_attr "type" "neon_add_halve_narrow_q")]
2212 )
2213
2214 ;; Polynomial and Float multiplication.
2215 (define_insn "neon_vmul<pf><mode>"
2216   [(set (match_operand:VPF 0 "s_register_operand" "=w")
2217         (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2218                       (match_operand:VPF 2 "s_register_operand" "w")]
2219                      UNSPEC_VMUL))]
2220   "TARGET_NEON"
2221   "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2222   [(set (attr "type")
2223       (if_then_else (match_test "<Is_float_mode>")
2224                     (const_string "neon_fp_mul_s<q>")
2225                     (const_string "neon_mul_<V_elem_ch><q>")))]
2226 )
2227
2228 (define_insn "mul<mode>3"
2229  [(set
2230    (match_operand:VH 0 "s_register_operand" "=w")
2231    (mult:VH
2232     (match_operand:VH 1 "s_register_operand" "w")
2233     (match_operand:VH 2 "s_register_operand" "w")))]
2234   "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2235   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2236  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2237 )
2238
2239 (define_insn "neon_vmulf<mode>"
2240  [(set
2241    (match_operand:VH 0 "s_register_operand" "=w")
2242    (mult:VH
2243     (match_operand:VH 1 "s_register_operand" "w")
2244     (match_operand:VH 2 "s_register_operand" "w")))]
2245   "TARGET_NEON_FP16INST"
2246   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2247  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2248 )
2249
2250 (define_expand "neon_vmla<mode>"
2251   [(match_operand:VDQW 0 "s_register_operand" "=w")
2252    (match_operand:VDQW 1 "s_register_operand" "0")
2253    (match_operand:VDQW 2 "s_register_operand" "w")
2254    (match_operand:VDQW 3 "s_register_operand" "w")]
2255   "TARGET_NEON"
2256 {
2257   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2258     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2259                                              operands[2], operands[3]));
2260   else
2261     emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2262                                            operands[2], operands[3]));
2263   DONE;
2264 })
2265
2266 (define_expand "neon_vfma<VCVTF:mode>"
2267   [(match_operand:VCVTF 0 "s_register_operand")
2268    (match_operand:VCVTF 1 "s_register_operand")
2269    (match_operand:VCVTF 2 "s_register_operand")
2270    (match_operand:VCVTF 3 "s_register_operand")]
2271   "TARGET_NEON && TARGET_FMA"
2272 {
2273   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2274                                        operands[1]));
2275   DONE;
2276 })
2277
2278 (define_expand "neon_vfma<VH:mode>"
2279   [(match_operand:VH 0 "s_register_operand")
2280    (match_operand:VH 1 "s_register_operand")
2281    (match_operand:VH 2 "s_register_operand")
2282    (match_operand:VH 3 "s_register_operand")]
2283   "TARGET_NEON_FP16INST"
2284 {
2285   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2286                                        operands[1]));
2287   DONE;
2288 })
2289
2290 (define_expand "neon_vfms<VCVTF:mode>"
2291   [(match_operand:VCVTF 0 "s_register_operand")
2292    (match_operand:VCVTF 1 "s_register_operand")
2293    (match_operand:VCVTF 2 "s_register_operand")
2294    (match_operand:VCVTF 3 "s_register_operand")]
2295   "TARGET_NEON && TARGET_FMA"
2296 {
2297   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2298                                          operands[1]));
2299   DONE;
2300 })
2301
2302 (define_expand "neon_vfms<VH:mode>"
2303   [(match_operand:VH 0 "s_register_operand")
2304    (match_operand:VH 1 "s_register_operand")
2305    (match_operand:VH 2 "s_register_operand")
2306    (match_operand:VH 3 "s_register_operand")]
2307   "TARGET_NEON_FP16INST"
2308 {
2309   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2310                                          operands[1]));
2311   DONE;
2312 })
2313
2314 ;; The expand RTL structure here is not important.
2315 ;; We use the gen_* functions anyway.
2316 ;; We just need something to wrap the iterators around.
2317
2318 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2319   [(set (match_operand:VCVTF 0 "s_register_operand")
2320      (unspec:VCVTF
2321         [(match_operand:VCVTF 1 "s_register_operand")
2322            (PLUSMINUS:<VFML>
2323              (match_operand:<VFML> 2 "s_register_operand")
2324              (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2325   "TARGET_FP16FML"
2326 {
2327   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2328   emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2329                                                              operands[1],
2330                                                              operands[2],
2331                                                              operands[3],
2332                                                              half, half));
2333   DONE;
2334 })
2335
2336 (define_insn "vfmal_low<mode>_intrinsic"
2337  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2338         (fma:VCVTF
2339          (float_extend:VCVTF
2340           (vec_select:<VFMLSEL>
2341            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2342            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2343          (float_extend:VCVTF
2344           (vec_select:<VFMLSEL>
2345            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2346            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2347          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2348  "TARGET_FP16FML"
2349  "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2350  [(set_attr "type" "neon_fp_mla_s<q>")]
2351 )
2352
2353 (define_insn "vfmsl_high<mode>_intrinsic"
2354  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2355         (fma:VCVTF
2356          (float_extend:VCVTF
2357           (neg:<VFMLSEL>
2358             (vec_select:<VFMLSEL>
2359               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2360               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2361          (float_extend:VCVTF
2362           (vec_select:<VFMLSEL>
2363            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2364            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2365          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2366  "TARGET_FP16FML"
2367  "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2368  [(set_attr "type" "neon_fp_mla_s<q>")]
2369 )
2370
2371 (define_insn "vfmal_high<mode>_intrinsic"
2372  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2373         (fma:VCVTF
2374          (float_extend:VCVTF
2375           (vec_select:<VFMLSEL>
2376            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2377            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2378          (float_extend:VCVTF
2379           (vec_select:<VFMLSEL>
2380            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2381            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2382          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2383  "TARGET_FP16FML"
2384  "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2385  [(set_attr "type" "neon_fp_mla_s<q>")]
2386 )
2387
2388 (define_insn "vfmsl_low<mode>_intrinsic"
2389  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2390         (fma:VCVTF
2391          (float_extend:VCVTF
2392           (neg:<VFMLSEL>
2393             (vec_select:<VFMLSEL>
2394               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2395               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2396          (float_extend:VCVTF
2397           (vec_select:<VFMLSEL>
2398            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2399            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2400          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2401  "TARGET_FP16FML"
2402  "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2403  [(set_attr "type" "neon_fp_mla_s<q>")]
2404 )
2405
2406 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2407   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2408      (unspec:VCVTF
2409         [(match_operand:VCVTF 1 "s_register_operand")
2410          (PLUSMINUS:<VFML>
2411            (match_operand:<VFML> 2 "s_register_operand")
2412            (match_operand:<VFML> 3 "s_register_operand"))
2413          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2414   "TARGET_FP16FML"
2415 {
2416   rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2417   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2418   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2419                                                (operands[0], operands[1],
2420                                                 operands[2], operands[3],
2421                                                 half, lane));
2422   DONE;
2423 })
2424
2425 (define_insn "vfmal_lane_low<mode>_intrinsic"
2426  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2427         (fma:VCVTF
2428          (float_extend:VCVTF
2429           (vec_select:<VFMLSEL>
2430            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2431            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2432          (float_extend:VCVTF
2433            (vec_duplicate:<VFMLSEL>
2434              (vec_select:HF
2435                (match_operand:<VFML> 3 "s_register_operand" "x")
2436                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2437          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2438  "TARGET_FP16FML"
2439  {
2440     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2441     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2442       {
2443         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2444         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2445       }
2446     else
2447       {
2448         operands[5] = GEN_INT (lane);
2449         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2450       }
2451   }
2452  [(set_attr "type" "neon_fp_mla_s<q>")]
2453 )
2454
2455 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2456   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2457      (unspec:VCVTF
2458         [(match_operand:VCVTF 1 "s_register_operand")
2459          (PLUSMINUS:<VFML>
2460            (match_operand:<VFML> 2 "s_register_operand")
2461            (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2462          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2463   "TARGET_FP16FML"
2464 {
2465   rtx lane
2466     = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2467   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2468   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2469                 (operands[0], operands[1], operands[2], operands[3],
2470                  half, lane));
2471   DONE;
2472 })
2473
2474 ;; Used to implement the intrinsics:
2475 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2476 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2477 ;; Needs a bit of care to get the modes of the different sub-expressions right
2478 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2479 ;; S or D subregister to select the appropriate lane from.
2480
2481 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2482  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2483         (fma:VCVTF
2484          (float_extend:VCVTF
2485           (vec_select:<VFMLSEL>
2486            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2487            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2488          (float_extend:VCVTF
2489            (vec_duplicate:<VFMLSEL>
2490              (vec_select:HF
2491                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2492                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2493          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2494  "TARGET_FP16FML"
2495  {
2496    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2497    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2498    int new_lane = lane % elts_per_reg;
2499    int regdiff = lane / elts_per_reg;
2500    operands[5] = GEN_INT (new_lane);
2501    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2502       because we want the print_operand code to print the appropriate
2503       S or D register prefix.  */
2504    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2505    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2506    return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2507  }
2508  [(set_attr "type" "neon_fp_mla_s<q>")]
2509 )
2510
2511 ;; Used to implement the intrinsics:
2512 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2513 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2514 ;; Needs a bit of care to get the modes of the different sub-expressions right
2515 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2516 ;; S or D subregister to select the appropriate lane from.
2517
2518 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2519  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2520         (fma:VCVTF
2521          (float_extend:VCVTF
2522           (vec_select:<VFMLSEL>
2523            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2524            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2525          (float_extend:VCVTF
2526            (vec_duplicate:<VFMLSEL>
2527              (vec_select:HF
2528                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2529                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2530          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2531  "TARGET_FP16FML"
2532  {
2533    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2534    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2535    int new_lane = lane % elts_per_reg;
2536    int regdiff = lane / elts_per_reg;
2537    operands[5] = GEN_INT (new_lane);
2538    /* We re-create operands[3] in the halved VFMLSEL mode
2539       because we've calculated the correct half-width subreg to extract
2540       the lane from and we want to print *that* subreg instead.  */
2541    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2542    return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2543  }
2544  [(set_attr "type" "neon_fp_mla_s<q>")]
2545 )
2546
2547 (define_insn "vfmal_lane_high<mode>_intrinsic"
2548  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2549         (fma:VCVTF
2550          (float_extend:VCVTF
2551           (vec_select:<VFMLSEL>
2552            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2553            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2554          (float_extend:VCVTF
2555            (vec_duplicate:<VFMLSEL>
2556              (vec_select:HF
2557                (match_operand:<VFML> 3 "s_register_operand" "x")
2558                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2559          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2560  "TARGET_FP16FML"
2561   {
2562     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2563     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2564       {
2565         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2566         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2567       }
2568     else
2569       {
2570         operands[5] = GEN_INT (lane);
2571         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2572       }
2573   }
2574  [(set_attr "type" "neon_fp_mla_s<q>")]
2575 )
2576
2577 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2578  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579         (fma:VCVTF
2580          (float_extend:VCVTF
2581           (neg:<VFMLSEL>
2582             (vec_select:<VFMLSEL>
2583               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2584               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2585          (float_extend:VCVTF
2586            (vec_duplicate:<VFMLSEL>
2587              (vec_select:HF
2588                (match_operand:<VFML> 3 "s_register_operand" "x")
2589                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2590          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2591  "TARGET_FP16FML"
2592  {
2593     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2594     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2595       {
2596         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2597         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2598       }
2599     else
2600       {
2601         operands[5] = GEN_INT (lane);
2602         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2603       }
2604   }
2605  [(set_attr "type" "neon_fp_mla_s<q>")]
2606 )
2607
2608 ;; Used to implement the intrinsics:
2609 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2610 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2611 ;; Needs a bit of care to get the modes of the different sub-expressions right
2612 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2613 ;; S or D subregister to select the appropriate lane from.
2614
2615 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2616  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2617         (fma:VCVTF
2618          (float_extend:VCVTF
2619           (neg:<VFMLSEL>
2620             (vec_select:<VFMLSEL>
2621               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2622               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2623          (float_extend:VCVTF
2624            (vec_duplicate:<VFMLSEL>
2625              (vec_select:HF
2626                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2627                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2628          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2629  "TARGET_FP16FML"
2630  {
2631    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2632    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2633    int new_lane = lane % elts_per_reg;
2634    int regdiff = lane / elts_per_reg;
2635    operands[5] = GEN_INT (new_lane);
2636    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2637       because we want the print_operand code to print the appropriate
2638       S or D register prefix.  */
2639    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2640    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2641    return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2642  }
2643  [(set_attr "type" "neon_fp_mla_s<q>")]
2644 )
2645
2646 ;; Used to implement the intrinsics:
2647 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2648 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2649 ;; Needs a bit of care to get the modes of the different sub-expressions right
2650 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2651 ;; S or D subregister to select the appropriate lane from.
2652
2653 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2654  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2655         (fma:VCVTF
2656          (float_extend:VCVTF
2657           (neg:<VFMLSEL>
2658             (vec_select:<VFMLSEL>
2659              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2660              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2661          (float_extend:VCVTF
2662            (vec_duplicate:<VFMLSEL>
2663              (vec_select:HF
2664                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2665                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2666          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2667  "TARGET_FP16FML"
2668  {
2669    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2670    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2671    int new_lane = lane % elts_per_reg;
2672    int regdiff = lane / elts_per_reg;
2673    operands[5] = GEN_INT (new_lane);
2674    /* We re-create operands[3] in the halved VFMLSEL mode
2675       because we've calculated the correct half-width subreg to extract
2676       the lane from and we want to print *that* subreg instead.  */
2677    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2678    return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2679  }
2680  [(set_attr "type" "neon_fp_mla_s<q>")]
2681 )
2682
2683 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2684  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2685         (fma:VCVTF
2686          (float_extend:VCVTF
2687           (neg:<VFMLSEL>
2688             (vec_select:<VFMLSEL>
2689              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2690              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2691          (float_extend:VCVTF
2692            (vec_duplicate:<VFMLSEL>
2693              (vec_select:HF
2694                (match_operand:<VFML> 3 "s_register_operand" "x")
2695                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2696          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2697  "TARGET_FP16FML"
2698   {
2699     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2700     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2701       {
2702         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2703         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2704       }
2705     else
2706       {
2707         operands[5] = GEN_INT (lane);
2708         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2709       }
2710   }
2711  [(set_attr "type" "neon_fp_mla_s<q>")]
2712 )
2713
2714 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2715
2716 (define_insn "neon_vmla<mode>_unspec"
2717   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2718         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2719                       (match_operand:VDQW 2 "s_register_operand" "w")
2720                       (match_operand:VDQW 3 "s_register_operand" "w")]
2721                     UNSPEC_VMLA))]
2722   "TARGET_NEON"
2723   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2724   [(set (attr "type")
2725       (if_then_else (match_test "<Is_float_mode>")
2726                     (const_string "neon_fp_mla_s<q>")
2727                     (const_string "neon_mla_<V_elem_ch><q>")))]
2728 )
2729
2730 (define_insn "neon_vmlal<sup><mode>"
2731   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2732         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2733                            (match_operand:VW 2 "s_register_operand" "w")
2734                            (match_operand:VW 3 "s_register_operand" "w")]
2735                           VMLAL))]
2736   "TARGET_NEON"
2737   "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2738   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2739 )
2740
2741 (define_expand "neon_vmls<mode>"
2742   [(match_operand:VDQW 0 "s_register_operand" "=w")
2743    (match_operand:VDQW 1 "s_register_operand" "0")
2744    (match_operand:VDQW 2 "s_register_operand" "w")
2745    (match_operand:VDQW 3 "s_register_operand" "w")]
2746   "TARGET_NEON"
2747 {
2748   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2749     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2750                  operands[1], operands[2], operands[3]));
2751   else
2752     emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2753                                            operands[2], operands[3]));
2754   DONE;
2755 })
2756
2757 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2758
2759 (define_insn "neon_vmls<mode>_unspec"
2760   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2761         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2762                       (match_operand:VDQW 2 "s_register_operand" "w")
2763                       (match_operand:VDQW 3 "s_register_operand" "w")]
2764                     UNSPEC_VMLS))]
2765   "TARGET_NEON"
2766   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2767   [(set (attr "type")
2768       (if_then_else (match_test "<Is_float_mode>")
2769                     (const_string "neon_fp_mla_s<q>")
2770                     (const_string "neon_mla_<V_elem_ch><q>")))]
2771 )
2772
2773 (define_insn "neon_vmlsl<sup><mode>"
2774   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2775         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2776                            (match_operand:VW 2 "s_register_operand" "w")
2777                            (match_operand:VW 3 "s_register_operand" "w")]
2778                           VMLSL))]
2779   "TARGET_NEON"
2780   "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2781   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2782 )
2783
2784 ;; vqdmulh, vqrdmulh
2785 (define_insn "neon_vq<r>dmulh<mode>"
2786   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2787         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2788                        (match_operand:VMDQI 2 "s_register_operand" "w")]
2789                       VQDMULH))]
2790   "TARGET_NEON"
2791   "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2792   [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2793 )
2794
2795 ;; vqrdmlah, vqrdmlsh
2796 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2797   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2798         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2799                        (match_operand:VMDQI 2 "s_register_operand" "w")
2800                        (match_operand:VMDQI 3 "s_register_operand" "w")]
2801                       VQRDMLH_AS))]
2802   "TARGET_NEON_RDMA"
2803   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2804   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2805 )
2806
2807 (define_insn "neon_vqdmlal<mode>"
2808   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2809         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2810                            (match_operand:VMDI 2 "s_register_operand" "w")
2811                            (match_operand:VMDI 3 "s_register_operand" "w")]
2812                           UNSPEC_VQDMLAL))]
2813   "TARGET_NEON"
2814   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2815   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2816 )
2817
2818 (define_insn "neon_vqdmlsl<mode>"
2819   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2821                            (match_operand:VMDI 2 "s_register_operand" "w")
2822                            (match_operand:VMDI 3 "s_register_operand" "w")]
2823                           UNSPEC_VQDMLSL))]
2824   "TARGET_NEON"
2825   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2826   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2827 )
2828
2829 (define_insn "neon_vmull<sup><mode>"
2830   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2831         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2832                            (match_operand:VW 2 "s_register_operand" "w")]
2833                           VMULL))]
2834   "TARGET_NEON"
2835   "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2836   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2837 )
2838
2839 (define_insn "neon_vqdmull<mode>"
2840   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2841         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2842                            (match_operand:VMDI 2 "s_register_operand" "w")]
2843                           UNSPEC_VQDMULL))]
2844   "TARGET_NEON"
2845   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2846   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2847 )
2848
2849 (define_expand "neon_vsub<mode>"
2850   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2851    (match_operand:VCVTF 1 "s_register_operand" "w")
2852    (match_operand:VCVTF 2 "s_register_operand" "w")]
2853   "TARGET_NEON"
2854 {
2855   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2856     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2857   else
2858     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2859                                            operands[2]));
2860   DONE;
2861 })
2862
2863 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2864
2865 (define_insn "neon_vsub<mode>_unspec"
2866   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2867         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2868                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2869                      UNSPEC_VSUB))]
2870   "TARGET_NEON"
2871   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2872   [(set (attr "type")
2873       (if_then_else (match_test "<Is_float_mode>")
2874                     (const_string "neon_fp_addsub_s<q>")
2875                     (const_string "neon_sub<q>")))]
2876 )
2877
2878 (define_insn "neon_vsubl<sup><mode>"
2879   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2880         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2881                            (match_operand:VDI 2 "s_register_operand" "w")]
2882                           VSUBL))]
2883   "TARGET_NEON"
2884   "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2885   [(set_attr "type" "neon_sub_long")]
2886 )
2887
2888 (define_insn "neon_vsubw<sup><mode>"
2889   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2890         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2891                            (match_operand:VDI 2 "s_register_operand" "w")]
2892                           VSUBW))]
2893   "TARGET_NEON"
2894   "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2895   [(set_attr "type" "neon_sub_widen")]
2896 )
2897
2898 (define_insn "neon_vqsub<sup><mode>"
2899   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2900         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2901                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2902                       VQSUB))]
2903   "TARGET_NEON"
2904   "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2905   [(set_attr "type" "neon_qsub<q>")]
2906 )
2907
2908 (define_insn "neon_vhsub<sup><mode>"
2909   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2910         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2911                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2912                       VHSUB))]
2913   "TARGET_NEON"
2914   "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2915   [(set_attr "type" "neon_sub_halve<q>")]
2916 )
2917
2918 (define_insn "neon_v<r>subhn<mode>"
2919   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2920         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2921                             (match_operand:VN 2 "s_register_operand" "w")]
2922                            VSUBHN))]
2923   "TARGET_NEON"
2924   "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2925   [(set_attr "type" "neon_sub_halve_narrow_q")]
2926 )
2927
2928 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2929 ;; without unsafe math optimizations.
2930 (define_expand "neon_vc<cmp_op><mode>"
2931   [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2932      (neg:<V_cmp_result>
2933        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2934                          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2935   "TARGET_NEON"
2936   {
2937     /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2938        are enabled.  */
2939     if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2940         && !flag_unsafe_math_optimizations)
2941       {
2942         /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2943            we define gen_neon_vceq<mode>_insn_unspec only for float modes
2944            whereas this expander iterates over the integer modes as well,
2945            but we will never expand to UNSPECs for the integer comparisons.  */
2946         switch (<MODE>mode)
2947           {
2948             case E_V2SFmode:
2949               emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2950                                                               operands[1],
2951                                                               operands[2]));
2952               break;
2953             case E_V4SFmode:
2954               emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2955                                                               operands[1],
2956                                                               operands[2]));
2957               break;
2958             default:
2959               gcc_unreachable ();
2960           }
2961       }
2962     else
2963       emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2964                                                  operands[1],
2965                                                  operands[2]));
2966     DONE;
2967   }
2968 )
2969
2970 (define_insn "neon_vc<cmp_op><mode>_insn"
2971   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2972         (neg:<V_cmp_result>
2973           (COMPARISONS:<V_cmp_result>
2974             (match_operand:VDQW 1 "s_register_operand" "w,w")
2975             (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2976   "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2977                     && !flag_unsafe_math_optimizations)"
2978   {
2979     char pattern[100];
2980     sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2981                       " %%<V_reg>1, %s",
2982                        GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2983                          ? "f" : "<cmp_type>",
2984                        which_alternative == 0
2985                          ? "%<V_reg>2" : "#0");
2986     output_asm_insn (pattern, operands);
2987     return "";
2988   }
2989   [(set (attr "type")
2990         (if_then_else (match_operand 2 "zero_operand")
2991                       (const_string "neon_compare_zero<q>")
2992                       (const_string "neon_compare<q>")))]
2993 )
2994
2995 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2996   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2997         (unspec:<V_cmp_result>
2998           [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2999            (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3000           NEON_VCMP))]
3001   "TARGET_NEON"
3002   {
3003     char pattern[100];
3004     sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3005                        " %%<V_reg>1, %s",
3006                        which_alternative == 0
3007                          ? "%<V_reg>2" : "#0");
3008     output_asm_insn (pattern, operands);
3009     return "";
3010 }
3011   [(set_attr "type" "neon_fp_compare_s<q>")]
3012 )
3013
3014 (define_expand "neon_vc<cmp_op><mode>"
3015  [(match_operand:<V_cmp_result> 0 "s_register_operand")
3016   (neg:<V_cmp_result>
3017    (COMPARISONS:VH
3018     (match_operand:VH 1 "s_register_operand")
3019     (match_operand:VH 2 "reg_or_zero_operand")))]
3020  "TARGET_NEON_FP16INST"
3021 {
3022   /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3023      are enabled.  */
3024   if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3025       && !flag_unsafe_math_optimizations)
3026     emit_insn
3027       (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3028        (operands[0], operands[1], operands[2]));
3029   else
3030     emit_insn
3031       (gen_neon_vc<cmp_op><mode>_fp16insn
3032        (operands[0], operands[1], operands[2]));
3033   DONE;
3034 })
3035
3036 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3037  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3038    (neg:<V_cmp_result>
3039     (COMPARISONS:<V_cmp_result>
3040      (match_operand:VH 1 "s_register_operand" "w,w")
3041      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3042  "TARGET_NEON_FP16INST
3043   && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3044   && !flag_unsafe_math_optimizations)"
3045 {
3046   char pattern[100];
3047   sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3048            " %%<V_reg>1, %s",
3049            GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3050            ? "f" : "<cmp_type>",
3051            which_alternative == 0
3052            ? "%<V_reg>2" : "#0");
3053   output_asm_insn (pattern, operands);
3054   return "";
3055 }
3056  [(set (attr "type")
3057    (if_then_else (match_operand 2 "zero_operand")
3058     (const_string "neon_compare_zero<q>")
3059     (const_string "neon_compare<q>")))])
3060
3061 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3062  [(set
3063    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3064    (unspec:<V_cmp_result>
3065     [(match_operand:VH 1 "s_register_operand" "w,w")
3066      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3067     NEON_VCMP))]
3068  "TARGET_NEON_FP16INST"
3069 {
3070   char pattern[100];
3071   sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3072            " %%<V_reg>1, %s",
3073            which_alternative == 0
3074            ? "%<V_reg>2" : "#0");
3075   output_asm_insn (pattern, operands);
3076   return "";
3077 }
3078  [(set_attr "type" "neon_fp_compare_s<q>")])
3079
3080 (define_insn "neon_vc<cmp_op>u<mode>"
3081   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3082         (neg:<V_cmp_result>
3083           (GTUGEU:<V_cmp_result>
3084             (match_operand:VDQIW 1 "s_register_operand" "w")
3085             (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3086   "TARGET_NEON"
3087   "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3088   [(set_attr "type" "neon_compare<q>")]
3089 )
3090
3091 (define_expand "neon_vca<cmp_op><mode>"
3092   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3093         (neg:<V_cmp_result>
3094           (GTGE:<V_cmp_result>
3095             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3096             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3097   "TARGET_NEON"
3098   {
3099     if (flag_unsafe_math_optimizations)
3100       emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3101                                                   operands[2]));
3102     else
3103       emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3104                                                          operands[1],
3105                                                          operands[2]));
3106     DONE;
3107   }
3108 )
3109
3110 (define_insn "neon_vca<cmp_op><mode>_insn"
3111   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3112         (neg:<V_cmp_result>
3113           (GTGE:<V_cmp_result>
3114             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3115             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3116   "TARGET_NEON && flag_unsafe_math_optimizations"
3117   "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3118   [(set_attr "type" "neon_fp_compare_s<q>")]
3119 )
3120
3121 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3122   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3123         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3124                                 (match_operand:VCVTF 2 "s_register_operand" "w")]
3125                                NEON_VACMP))]
3126   "TARGET_NEON"
3127   "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3128   [(set_attr "type" "neon_fp_compare_s<q>")]
3129 )
3130
3131 (define_expand "neon_vca<cmp_op><mode>"
3132   [(set
3133     (match_operand:<V_cmp_result> 0 "s_register_operand")
3134     (neg:<V_cmp_result>
3135      (GLTE:<V_cmp_result>
3136       (abs:VH (match_operand:VH 1 "s_register_operand"))
3137       (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3138  "TARGET_NEON_FP16INST"
3139 {
3140   if (flag_unsafe_math_optimizations)
3141     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3142                (operands[0], operands[1], operands[2]));
3143   else
3144     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3145                (operands[0], operands[1], operands[2]));
3146   DONE;
3147 })
3148
3149 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3150   [(set
3151     (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3152     (neg:<V_cmp_result>
3153      (GLTE:<V_cmp_result>
3154       (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3155       (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3156  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3157  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3158  [(set_attr "type" "neon_fp_compare_s<q>")]
3159 )
3160
3161 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3162  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3163    (unspec:<V_cmp_result>
3164     [(match_operand:VH 1 "s_register_operand" "w")
3165      (match_operand:VH 2 "s_register_operand" "w")]
3166     NEON_VAGLTE))]
3167  "TARGET_NEON"
3168  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3169  [(set_attr "type" "neon_fp_compare_s<q>")]
3170 )
3171
3172 (define_expand "neon_vc<cmp_op>z<mode>"
3173  [(set
3174    (match_operand:<V_cmp_result> 0 "s_register_operand")
3175    (COMPARISONS:<V_cmp_result>
3176     (match_operand:VH 1 "s_register_operand")
3177     (const_int 0)))]
3178  "TARGET_NEON_FP16INST"
3179  {
3180   emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3181                                         CONST0_RTX (<MODE>mode)));
3182   DONE;
3183 })
3184
3185 (define_insn "neon_vtst<mode>"
3186   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3187         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3188                        (match_operand:VDQIW 2 "s_register_operand" "w")]
3189                       UNSPEC_VTST))]
3190   "TARGET_NEON"
3191   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3192   [(set_attr "type" "neon_tst<q>")]
3193 )
3194
3195 (define_insn "neon_vabd<sup><mode>"
3196   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3197         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3198                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3199                      VABD))]
3200   "TARGET_NEON"
3201   "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3202   [(set_attr "type" "neon_abd<q>")]
3203 )
3204
3205 (define_insn "neon_vabd<mode>"
3206   [(set (match_operand:VH 0 "s_register_operand" "=w")
3207     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3208                 (match_operand:VH 2 "s_register_operand" "w")]
3209      UNSPEC_VABD_F))]
3210  "TARGET_NEON_FP16INST"
3211  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3212   [(set_attr "type" "neon_abd<q>")]
3213 )
3214
3215 (define_insn "neon_vabdf<mode>"
3216   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3217         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3218                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3219                      UNSPEC_VABD_F))]
3220   "TARGET_NEON"
3221   "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3222   [(set_attr "type" "neon_fp_abd_s<q>")]
3223 )
3224
3225 (define_insn "neon_vabdl<sup><mode>"
3226   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3228                            (match_operand:VW 2 "s_register_operand" "w")]
3229                           VABDL))]
3230   "TARGET_NEON"
3231   "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3232   [(set_attr "type" "neon_abd_long")]
3233 )
3234
3235 (define_insn "neon_vaba<sup><mode>"
3236   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3237         (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3238                                    (match_operand:VDQIW 3 "s_register_operand" "w")]
3239                                   VABD)
3240                     (match_operand:VDQIW 1 "s_register_operand" "0")))]
3241   "TARGET_NEON"
3242   "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3243   [(set_attr "type" "neon_arith_acc<q>")]
3244 )
3245
3246 (define_insn "neon_vabal<sup><mode>"
3247   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3248         (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3249                                            (match_operand:VW 3 "s_register_operand" "w")]
3250                                            VABDL)
3251                          (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3252   "TARGET_NEON"
3253   "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3254   [(set_attr "type" "neon_arith_acc<q>")]
3255 )
3256
3257 (define_insn "neon_v<maxmin><sup><mode>"
3258   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3259         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3260                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3261                      VMAXMIN))]
3262   "TARGET_NEON"
3263   "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264   [(set_attr "type" "neon_minmax<q>")]
3265 )
3266
3267 (define_insn "neon_v<maxmin>f<mode>"
3268   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3269         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3270                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3271                      VMAXMINF))]
3272   "TARGET_NEON"
3273   "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3274   [(set_attr "type" "neon_fp_minmax_s<q>")]
3275 )
3276
3277 (define_insn "neon_v<maxmin>f<mode>"
3278  [(set (match_operand:VH 0 "s_register_operand" "=w")
3279    (unspec:VH
3280     [(match_operand:VH 1 "s_register_operand" "w")
3281      (match_operand:VH 2 "s_register_operand" "w")]
3282     VMAXMINF))]
3283  "TARGET_NEON_FP16INST"
3284  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3285  [(set_attr "type" "neon_fp_minmax_s<q>")]
3286 )
3287
3288 (define_insn "neon_vp<maxmin>fv4hf"
3289  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3290    (unspec:V4HF
3291     [(match_operand:V4HF 1 "s_register_operand" "w")
3292      (match_operand:V4HF 2 "s_register_operand" "w")]
3293     VPMAXMINF))]
3294  "TARGET_NEON_FP16INST"
3295  "vp<maxmin>.f16\t%P0, %P1, %P2"
3296   [(set_attr "type" "neon_reduc_minmax")]
3297 )
3298
3299 (define_insn "neon_<fmaxmin_op><mode>"
3300  [(set
3301    (match_operand:VH 0 "s_register_operand" "=w")
3302    (unspec:VH
3303     [(match_operand:VH 1 "s_register_operand" "w")
3304      (match_operand:VH 2 "s_register_operand" "w")]
3305     VMAXMINFNM))]
3306  "TARGET_NEON_FP16INST"
3307  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3308  [(set_attr "type" "neon_fp_minmax_s<q>")]
3309 )
3310
3311 ;; v<maxmin>nm intrinsics.
3312 (define_insn "neon_<fmaxmin_op><mode>"
3313   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3314         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3315                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3316                        VMAXMINFNM))]
3317   "TARGET_NEON && TARGET_VFP5"
3318   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3319   [(set_attr "type" "neon_fp_minmax_s<q>")]
3320 )
3321
3322 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3323 (define_insn "<fmaxmin><mode>3"
3324   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3325         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3326                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3327                        VMAXMINFNM))]
3328   "TARGET_NEON && TARGET_VFP5"
3329   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3330   [(set_attr "type" "neon_fp_minmax_s<q>")]
3331 )
3332
3333 (define_expand "neon_vpadd<mode>"
3334   [(match_operand:VD 0 "s_register_operand" "=w")
3335    (match_operand:VD 1 "s_register_operand" "w")
3336    (match_operand:VD 2 "s_register_operand" "w")]
3337   "TARGET_NEON"
3338 {
3339   emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3340                                             operands[2]));
3341   DONE;
3342 })
3343
3344 (define_insn "neon_vpaddl<sup><mode>"
3345   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3346         (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3347                                  VPADDL))]
3348   "TARGET_NEON"
3349   "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3350   [(set_attr "type" "neon_reduc_add_long")]
3351 )
3352
3353 (define_insn "neon_vpadal<sup><mode>"
3354   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3355         (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3356                                   (match_operand:VDQIW 2 "s_register_operand" "w")]
3357                                  VPADAL))]
3358   "TARGET_NEON"
3359   "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3360   [(set_attr "type" "neon_reduc_add_acc")]
3361 )
3362
3363 (define_insn "neon_vp<maxmin><sup><mode>"
3364   [(set (match_operand:VDI 0 "s_register_operand" "=w")
3365         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3366                     (match_operand:VDI 2 "s_register_operand" "w")]
3367                    VPMAXMIN))]
3368   "TARGET_NEON"
3369   "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3370   [(set_attr "type" "neon_reduc_minmax<q>")]
3371 )
3372
3373 (define_insn "neon_vp<maxmin>f<mode>"
3374   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3375         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3376                     (match_operand:VCVTF 2 "s_register_operand" "w")]
3377                    VPMAXMINF))]
3378   "TARGET_NEON"
3379   "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380   [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3381 )
3382
3383 (define_insn "neon_vrecps<mode>"
3384   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3387                       UNSPEC_VRECPS))]
3388   "TARGET_NEON"
3389   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390   [(set_attr "type" "neon_fp_recps_s<q>")]
3391 )
3392
3393 (define_insn "neon_vrecps<mode>"
3394   [(set
3395     (match_operand:VH 0 "s_register_operand" "=w")
3396     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397                 (match_operand:VH 2 "s_register_operand" "w")]
3398      UNSPEC_VRECPS))]
3399   "TARGET_NEON_FP16INST"
3400   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401   [(set_attr "type" "neon_fp_recps_s<q>")]
3402 )
3403
3404 (define_insn "neon_vrsqrts<mode>"
3405   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3406         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3407                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3408                       UNSPEC_VRSQRTS))]
3409   "TARGET_NEON"
3410   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3411   [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3412 )
3413
3414 (define_insn "neon_vrsqrts<mode>"
3415   [(set
3416     (match_operand:VH 0 "s_register_operand" "=w")
3417     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3418                  (match_operand:VH 2 "s_register_operand" "w")]
3419      UNSPEC_VRSQRTS))]
3420  "TARGET_NEON_FP16INST"
3421  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3422  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3423 )
3424
3425 (define_expand "neon_vabs<mode>"
3426   [(match_operand:VDQW 0 "s_register_operand" "")
3427    (match_operand:VDQW 1 "s_register_operand" "")]
3428   "TARGET_NEON"
3429 {
3430   emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3431   DONE;
3432 })
3433
3434 (define_insn "neon_vqabs<mode>"
3435   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3436         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3437                       UNSPEC_VQABS))]
3438   "TARGET_NEON"
3439   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3440   [(set_attr "type" "neon_qabs<q>")]
3441 )
3442
3443 (define_insn "neon_bswap<mode>"
3444   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3445         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3446   "TARGET_NEON"
3447   "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3448   [(set_attr "type" "neon_rev<q>")]
3449 )
3450
3451 (define_expand "neon_vneg<mode>"
3452   [(match_operand:VDQW 0 "s_register_operand" "")
3453    (match_operand:VDQW 1 "s_register_operand" "")]
3454   "TARGET_NEON"
3455 {
3456   emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3457   DONE;
3458 })
3459
3460 ;; These instructions map to the __builtins for the Dot Product operations.
3461 (define_insn "neon_<sup>dot<vsi2qi>"
3462   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3463         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3464                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3465                                                         "register_operand" "w")
3466                                    (match_operand:<VSI2QI> 3
3467                                                         "register_operand" "w")]
3468                 DOTPROD)))]
3469   "TARGET_DOTPROD"
3470   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3471   [(set_attr "type" "neon_dot")]
3472 )
3473
3474 ;; These instructions map to the __builtins for the Dot Product
3475 ;; indexed operations.
3476 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3477   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3478         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3479                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3480                                                         "register_operand" "w")
3481                                    (match_operand:V8QI 3 "register_operand" "t")
3482                                    (match_operand:SI 4 "immediate_operand" "i")]
3483                 DOTPROD)))]
3484   "TARGET_DOTPROD"
3485   {
3486     operands[4]
3487       = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3488     return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3489   }
3490   [(set_attr "type" "neon_dot")]
3491 )
3492
3493 ;; These expands map to the Dot Product optab the vectorizer checks for.
3494 ;; The auto-vectorizer expects a dot product builtin that also does an
3495 ;; accumulation into the provided register.
3496 ;; Given the following pattern
3497 ;;
3498 ;; for (i=0; i<len; i++) {
3499 ;;     c = a[i] * b[i];
3500 ;;     r += c;
3501 ;; }
3502 ;; return result;
3503 ;;
3504 ;; This can be auto-vectorized to
3505 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3506 ;;
3507 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
3508 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3509 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3510 ;; ...
3511 ;;
3512 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3513 (define_expand "<sup>dot_prod<vsi2qi>"
3514   [(set (match_operand:VCVTI 0 "register_operand")
3515         (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3516                                                         "register_operand")
3517                                    (match_operand:<VSI2QI> 2
3518                                                         "register_operand")]
3519                      DOTPROD)
3520                     (match_operand:VCVTI 3 "register_operand")))]
3521   "TARGET_DOTPROD"
3522 {
3523   emit_insn (
3524     gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3525                                  operands[2]));
3526   emit_insn (gen_rtx_SET (operands[0], operands[3]));
3527   DONE;
3528 })
3529
3530 (define_expand "neon_copysignf<mode>"
3531   [(match_operand:VCVTF 0 "register_operand")
3532    (match_operand:VCVTF 1 "register_operand")
3533    (match_operand:VCVTF 2 "register_operand")]
3534   "TARGET_NEON"
3535   "{
3536      rtx v_bitmask_cast;
3537      rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3538      rtx c = GEN_INT (0x80000000);
3539
3540      emit_move_insn (v_bitmask,
3541                      gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3542      emit_move_insn (operands[0], operands[2]);
3543      v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3544                                            <VCVTF:V_cmp_result>mode, 0);
3545      emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3546                                      operands[1]));
3547
3548      DONE;
3549   }"
3550 )
3551
3552 (define_insn "neon_vqneg<mode>"
3553   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3554         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3555                       UNSPEC_VQNEG))]
3556   "TARGET_NEON"
3557   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3558   [(set_attr "type" "neon_qneg<q>")]
3559 )
3560
3561 (define_insn "neon_vcls<mode>"
3562   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3563         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3564                       UNSPEC_VCLS))]
3565   "TARGET_NEON"
3566   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3567   [(set_attr "type" "neon_cls<q>")]
3568 )
3569
3570 (define_insn "clz<mode>2"
3571   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3572         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3573   "TARGET_NEON"
3574   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3575   [(set_attr "type" "neon_cnt<q>")]
3576 )
3577
3578 (define_expand "neon_vclz<mode>"
3579   [(match_operand:VDQIW 0 "s_register_operand" "")
3580    (match_operand:VDQIW 1 "s_register_operand" "")]
3581   "TARGET_NEON"
3582 {
3583   emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3584   DONE;
3585 })
3586
3587 (define_insn "popcount<mode>2"
3588   [(set (match_operand:VE 0 "s_register_operand" "=w")
3589         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3590   "TARGET_NEON"
3591   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3592   [(set_attr "type" "neon_cnt<q>")]
3593 )
3594
3595 (define_expand "neon_vcnt<mode>"
3596   [(match_operand:VE 0 "s_register_operand" "=w")
3597    (match_operand:VE 1 "s_register_operand" "w")]
3598   "TARGET_NEON"
3599 {
3600   emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3601   DONE;
3602 })
3603
3604 (define_insn "neon_vrecpe<mode>"
3605   [(set (match_operand:VH 0 "s_register_operand" "=w")
3606         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3607                    UNSPEC_VRECPE))]
3608   "TARGET_NEON_FP16INST"
3609   "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3610   [(set_attr "type" "neon_fp_recpe_s<q>")]
3611 )
3612
3613 (define_insn "neon_vrecpe<mode>"
3614   [(set (match_operand:V32 0 "s_register_operand" "=w")
3615         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3616                     UNSPEC_VRECPE))]
3617   "TARGET_NEON"
3618   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3619   [(set_attr "type" "neon_fp_recpe_s<q>")]
3620 )
3621
3622 (define_insn "neon_vrsqrte<mode>"
3623   [(set (match_operand:V32 0 "s_register_operand" "=w")
3624         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3625                     UNSPEC_VRSQRTE))]
3626   "TARGET_NEON"
3627   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3628   [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3629 )
3630
3631 (define_expand "neon_vmvn<mode>"
3632   [(match_operand:VDQIW 0 "s_register_operand" "")
3633    (match_operand:VDQIW 1 "s_register_operand" "")]
3634   "TARGET_NEON"
3635 {
3636   emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3637   DONE;
3638 })
3639
3640 (define_insn "neon_vget_lane<mode>_sext_internal"
3641   [(set (match_operand:SI 0 "s_register_operand" "=r")
3642         (sign_extend:SI
3643           (vec_select:<V_elem>
3644             (match_operand:VD 1 "s_register_operand" "w")
3645             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3646   "TARGET_NEON"
3647 {
3648   if (BYTES_BIG_ENDIAN)
3649     {
3650       int elt = INTVAL (operands[2]);
3651       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3652       operands[2] = GEN_INT (elt);
3653     }
3654   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3655 }
3656   [(set_attr "type" "neon_to_gp")]
3657 )
3658
3659 (define_insn "neon_vget_lane<mode>_zext_internal"
3660   [(set (match_operand:SI 0 "s_register_operand" "=r")
3661         (zero_extend:SI
3662           (vec_select:<V_elem>
3663             (match_operand:VD 1 "s_register_operand" "w")
3664             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3665   "TARGET_NEON"
3666 {
3667   if (BYTES_BIG_ENDIAN)
3668     {
3669       int elt = INTVAL (operands[2]);
3670       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3671       operands[2] = GEN_INT (elt);
3672     }
3673   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3674 }
3675   [(set_attr "type" "neon_to_gp")]
3676 )
3677
3678 (define_insn "neon_vget_lane<mode>_sext_internal"
3679   [(set (match_operand:SI 0 "s_register_operand" "=r")
3680         (sign_extend:SI
3681           (vec_select:<V_elem>
3682             (match_operand:VQ2 1 "s_register_operand" "w")
3683             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3684   "TARGET_NEON"
3685 {
3686   rtx ops[3];
3687   int regno = REGNO (operands[1]);
3688   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3689   unsigned int elt = INTVAL (operands[2]);
3690   unsigned int elt_adj = elt % halfelts;
3691
3692   if (BYTES_BIG_ENDIAN)
3693     elt_adj = halfelts - 1 - elt_adj;
3694
3695   ops[0] = operands[0];
3696   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3697   ops[2] = GEN_INT (elt_adj);
3698   output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3699
3700   return "";
3701 }
3702   [(set_attr "type" "neon_to_gp_q")]
3703 )
3704
3705 (define_insn "neon_vget_lane<mode>_zext_internal"
3706   [(set (match_operand:SI 0 "s_register_operand" "=r")
3707         (zero_extend:SI
3708           (vec_select:<V_elem>
3709             (match_operand:VQ2 1 "s_register_operand" "w")
3710             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3711   "TARGET_NEON"
3712 {
3713   rtx ops[3];
3714   int regno = REGNO (operands[1]);
3715   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3716   unsigned int elt = INTVAL (operands[2]);
3717   unsigned int elt_adj = elt % halfelts;
3718
3719   if (BYTES_BIG_ENDIAN)
3720     elt_adj = halfelts - 1 - elt_adj;
3721
3722   ops[0] = operands[0];
3723   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3724   ops[2] = GEN_INT (elt_adj);
3725   output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3726
3727   return "";
3728 }
3729   [(set_attr "type" "neon_to_gp_q")]
3730 )
3731
3732 (define_expand "neon_vget_lane<mode>"
3733   [(match_operand:<V_ext> 0 "s_register_operand" "")
3734    (match_operand:VDQW 1 "s_register_operand" "")
3735    (match_operand:SI 2 "immediate_operand" "")]
3736   "TARGET_NEON"
3737 {
3738   if (BYTES_BIG_ENDIAN)
3739     {
3740       /* The intrinsics are defined in terms of a model where the
3741          element ordering in memory is vldm order, whereas the generic
3742          RTL is defined in terms of a model where the element ordering
3743          in memory is array order.  Convert the lane number to conform
3744          to this model.  */
3745       unsigned int elt = INTVAL (operands[2]);
3746       unsigned int reg_nelts
3747         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3748       elt ^= reg_nelts - 1;
3749       operands[2] = GEN_INT (elt);
3750     }
3751
3752   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3753     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3754                                                 operands[2]));
3755   else
3756     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3757                                                        operands[1],
3758                                                        operands[2]));
3759   DONE;
3760 })
3761
3762 (define_expand "neon_vget_laneu<mode>"
3763   [(match_operand:<V_ext> 0 "s_register_operand" "")
3764    (match_operand:VDQIW 1 "s_register_operand" "")
3765    (match_operand:SI 2 "immediate_operand" "")]
3766   "TARGET_NEON"
3767 {
3768   if (BYTES_BIG_ENDIAN)
3769     {
3770       /* The intrinsics are defined in terms of a model where the
3771          element ordering in memory is vldm order, whereas the generic
3772          RTL is defined in terms of a model where the element ordering
3773          in memory is array order.  Convert the lane number to conform
3774          to this model.  */
3775       unsigned int elt = INTVAL (operands[2]);
3776       unsigned int reg_nelts
3777         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3778       elt ^= reg_nelts - 1;
3779       operands[2] = GEN_INT (elt);
3780     }
3781
3782   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3783     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3784                                                 operands[2]));
3785   else
3786     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3787                                                        operands[1],
3788                                                        operands[2]));
3789   DONE;
3790 })
3791
3792 (define_expand "neon_vget_lanedi"
3793   [(match_operand:DI 0 "s_register_operand" "=r")
3794    (match_operand:DI 1 "s_register_operand" "w")
3795    (match_operand:SI 2 "immediate_operand" "")]
3796   "TARGET_NEON"
3797 {
3798   emit_move_insn (operands[0], operands[1]);
3799   DONE;
3800 })
3801
3802 (define_expand "neon_vget_lanev2di"
3803   [(match_operand:DI 0 "s_register_operand" "")
3804    (match_operand:V2DI 1 "s_register_operand" "")
3805    (match_operand:SI 2 "immediate_operand" "")]
3806   "TARGET_NEON"
3807 {
3808   int lane;
3809
3810 if (BYTES_BIG_ENDIAN)
3811     {
3812       /* The intrinsics are defined in terms of a model where the
3813          element ordering in memory is vldm order, whereas the generic
3814          RTL is defined in terms of a model where the element ordering
3815          in memory is array order.  Convert the lane number to conform
3816          to this model.  */
3817       unsigned int elt = INTVAL (operands[2]);
3818       unsigned int reg_nelts = 2;
3819       elt ^= reg_nelts - 1;
3820       operands[2] = GEN_INT (elt);
3821     }
3822
3823   lane = INTVAL (operands[2]);
3824   gcc_assert ((lane ==0) || (lane == 1));
3825   emit_move_insn (operands[0], lane == 0
3826                                 ? gen_lowpart (DImode, operands[1])
3827                                 : gen_highpart (DImode, operands[1]));
3828   DONE;
3829 })
3830
3831 (define_expand "neon_vset_lane<mode>"
3832   [(match_operand:VDQ 0 "s_register_operand" "=w")
3833    (match_operand:<V_elem> 1 "s_register_operand" "r")
3834    (match_operand:VDQ 2 "s_register_operand" "0")
3835    (match_operand:SI 3 "immediate_operand" "i")]
3836   "TARGET_NEON"
3837 {
3838   unsigned int elt = INTVAL (operands[3]);
3839
3840   if (BYTES_BIG_ENDIAN)
3841     {
3842       unsigned int reg_nelts
3843         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3844       elt ^= reg_nelts - 1;
3845     }
3846
3847   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3848                                          GEN_INT (1 << elt), operands[2]));
3849   DONE;
3850 })
3851
3852 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3853
3854 (define_expand "neon_vset_lanedi"
3855   [(match_operand:DI 0 "s_register_operand" "=w")
3856    (match_operand:DI 1 "s_register_operand" "r")
3857    (match_operand:DI 2 "s_register_operand" "0")
3858    (match_operand:SI 3 "immediate_operand" "i")]
3859   "TARGET_NEON"
3860 {
3861   emit_move_insn (operands[0], operands[1]);
3862   DONE;
3863 })
3864
3865 (define_expand "neon_vcreate<mode>"
3866   [(match_operand:VD_RE 0 "s_register_operand" "")
3867    (match_operand:DI 1 "general_operand" "")]
3868   "TARGET_NEON"
3869 {
3870   rtx src = gen_lowpart (<MODE>mode, operands[1]);
3871   emit_move_insn (operands[0], src);
3872   DONE;
3873 })
3874
3875 (define_insn "neon_vdup_n<mode>"
3876   [(set (match_operand:VX 0 "s_register_operand" "=w")
3877         (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3878   "TARGET_NEON"
3879   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3880   [(set_attr "type" "neon_from_gp<q>")]
3881 )
3882
3883 (define_insn "neon_vdup_nv4hf"
3884   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3885         (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3886   "TARGET_NEON"
3887   "vdup.16\t%P0, %1"
3888   [(set_attr "type" "neon_from_gp")]
3889 )
3890
3891 (define_insn "neon_vdup_nv8hf"
3892   [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3893         (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3894   "TARGET_NEON"
3895   "vdup.16\t%q0, %1"
3896   [(set_attr "type" "neon_from_gp_q")]
3897 )
3898
3899 (define_insn "neon_vdup_n<mode>"
3900   [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3901         (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3902   "TARGET_NEON"
3903   "@
3904   vdup.<V_sz_elem>\t%<V_reg>0, %1
3905   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3906   [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3907 )
3908
3909 (define_expand "neon_vdup_ndi"
3910   [(match_operand:DI 0 "s_register_operand" "=w")
3911    (match_operand:DI 1 "s_register_operand" "r")]
3912   "TARGET_NEON"
3913 {
3914   emit_move_insn (operands[0], operands[1]);
3915   DONE;
3916 }
3917 )
3918
3919 (define_insn "neon_vdup_nv2di"
3920   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3921         (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3922   "TARGET_NEON"
3923   "@
3924   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3925   vmov\t%e0, %P1\;vmov\t%f0, %P1"
3926   [(set_attr "length" "8")
3927    (set_attr "type" "multiple")]
3928 )
3929
3930 (define_insn "neon_vdup_lane<mode>_internal"
3931   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3932         (vec_duplicate:VDQW
3933           (vec_select:<V_elem>
3934             (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3935             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3936   "TARGET_NEON"
3937 {
3938   if (BYTES_BIG_ENDIAN)
3939     {
3940       int elt = INTVAL (operands[2]);
3941       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3942       operands[2] = GEN_INT (elt);
3943     }
3944   if (<Is_d_reg>)
3945     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3946   else
3947     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3948 }
3949   [(set_attr "type" "neon_dup<q>")]
3950 )
3951
3952 (define_insn "neon_vdup_lane<mode>_internal"
3953  [(set (match_operand:VH 0 "s_register_operand" "=w")
3954    (vec_duplicate:VH
3955     (vec_select:<V_elem>
3956      (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3957      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3958  "TARGET_NEON && TARGET_FP16"
3959 {
3960   if (BYTES_BIG_ENDIAN)
3961     {
3962       int elt = INTVAL (operands[2]);
3963       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3964       operands[2] = GEN_INT (elt);
3965     }
3966   if (<Is_d_reg>)
3967     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3968   else
3969     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3970 }
3971   [(set_attr "type" "neon_dup<q>")]
3972 )
3973
3974 (define_expand "neon_vdup_lane<mode>"
3975   [(match_operand:VDQW 0 "s_register_operand" "=w")
3976    (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3977    (match_operand:SI 2 "immediate_operand" "i")]
3978   "TARGET_NEON"
3979 {
3980   if (BYTES_BIG_ENDIAN)
3981     {
3982       unsigned int elt = INTVAL (operands[2]);
3983       unsigned int reg_nelts
3984         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3985       elt ^= reg_nelts - 1;
3986       operands[2] = GEN_INT (elt);
3987     }
3988     emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3989                                                   operands[2]));
3990     DONE;
3991 })
3992
3993 (define_expand "neon_vdup_lane<mode>"
3994   [(match_operand:VH 0 "s_register_operand")
3995    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3996    (match_operand:SI 2 "immediate_operand")]
3997   "TARGET_NEON && TARGET_FP16"
3998 {
3999   if (BYTES_BIG_ENDIAN)
4000     {
4001       unsigned int elt = INTVAL (operands[2]);
4002       unsigned int reg_nelts
4003         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4004       elt ^= reg_nelts - 1;
4005       operands[2] = GEN_INT (elt);
4006     }
4007   emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4008                                                 operands[2]));
4009   DONE;
4010 })
4011
4012 ; Scalar index is ignored, since only zero is valid here.
4013 (define_expand "neon_vdup_lanedi"
4014   [(match_operand:DI 0 "s_register_operand" "=w")
4015    (match_operand:DI 1 "s_register_operand" "w")
4016    (match_operand:SI 2 "immediate_operand" "i")]
4017   "TARGET_NEON"
4018 {
4019   emit_move_insn (operands[0], operands[1]);
4020   DONE;
4021 })
4022
4023 ; Likewise for v2di, as the DImode second operand has only a single element.
4024 (define_expand "neon_vdup_lanev2di"
4025   [(match_operand:V2DI 0 "s_register_operand" "=w")
4026    (match_operand:DI 1 "s_register_operand" "w")
4027    (match_operand:SI 2 "immediate_operand" "i")]
4028   "TARGET_NEON"
4029 {
4030   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4031   DONE;
4032 })
4033
4034 ; Disabled before reload because we don't want combine doing something silly,
4035 ; but used by the post-reload expansion of neon_vcombine.
4036 (define_insn "*neon_vswp<mode>"
4037   [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4038         (match_operand:VDQX 1 "s_register_operand" "+w"))
4039    (set (match_dup 1) (match_dup 0))]
4040   "TARGET_NEON && reload_completed"
4041   "vswp\t%<V_reg>0, %<V_reg>1"
4042   [(set_attr "type" "neon_permute<q>")]
4043 )
4044
4045 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4046 ;; dest vector.
4047 ;; FIXME: A different implementation of this builtin could make it much
4048 ;; more likely that we wouldn't actually need to output anything (we could make
4049 ;; it so that the reg allocator puts things in the right places magically
4050 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4051
4052 (define_insn_and_split "neon_vcombine<mode>"
4053   [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4054         (vec_concat:<V_DOUBLE>
4055           (match_operand:VDX 1 "s_register_operand" "w")
4056           (match_operand:VDX 2 "s_register_operand" "w")))]
4057   "TARGET_NEON"
4058   "#"
4059   "&& reload_completed"
4060   [(const_int 0)]
4061 {
4062   neon_split_vcombine (operands);
4063   DONE;
4064 }
4065 [(set_attr "type" "multiple")]
4066 )
4067
4068 (define_expand "neon_vget_high<mode>"
4069   [(match_operand:<V_HALF> 0 "s_register_operand")
4070    (match_operand:VQX 1 "s_register_operand")]
4071   "TARGET_NEON"
4072 {
4073   emit_move_insn (operands[0],
4074                   simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4075                                        GET_MODE_SIZE (<V_HALF>mode)));
4076   DONE;
4077 })
4078
4079 (define_expand "neon_vget_low<mode>"
4080   [(match_operand:<V_HALF> 0 "s_register_operand")
4081    (match_operand:VQX 1 "s_register_operand")]
4082   "TARGET_NEON"
4083 {
4084   emit_move_insn (operands[0],
4085                   simplify_gen_subreg (<V_HALF>mode, operands[1],
4086                                        <MODE>mode, 0));
4087   DONE;
4088 })
4089
4090 (define_insn "float<mode><V_cvtto>2"
4091   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4092         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4093   "TARGET_NEON && !flag_rounding_math"
4094   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4095   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4096 )
4097
4098 (define_insn "floatuns<mode><V_cvtto>2"
4099   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4100         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4101   "TARGET_NEON && !flag_rounding_math"
4102   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4103   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4104 )
4105
4106 (define_insn "fix_trunc<mode><V_cvtto>2"
4107   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4108         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4109   "TARGET_NEON"
4110   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4111   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4112 )
4113
4114 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4115   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4116         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4117   "TARGET_NEON"
4118   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4119   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4120 )
4121
4122 (define_insn "neon_vcvt<sup><mode>"
4123   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4124         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4125                           VCVT_US))]
4126   "TARGET_NEON"
4127   "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4128   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4129 )
4130
4131 (define_insn "neon_vcvt<sup><mode>"
4132   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4133         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4134                           VCVT_US))]
4135   "TARGET_NEON"
4136   "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4137   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4138 )
4139
4140 (define_insn "neon_vcvtv4sfv4hf"
4141   [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4142         (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4143                           UNSPEC_VCVT))]
4144   "TARGET_NEON && TARGET_FP16"
4145   "vcvt.f32.f16\t%q0, %P1"
4146   [(set_attr "type" "neon_fp_cvt_widen_h")]
4147 )
4148
4149 (define_insn "neon_vcvtv4hfv4sf"
4150   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4151         (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4152                           UNSPEC_VCVT))]
4153   "TARGET_NEON && TARGET_FP16"
4154   "vcvt.f16.f32\t%P0, %q1"
4155   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4156 )
4157
4158 (define_insn "neon_vcvt<sup><mode>"
4159  [(set
4160    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4161    (unspec:<VH_CVTTO>
4162     [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4163     VCVT_US))]
4164  "TARGET_NEON_FP16INST"
4165  "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4166   [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4167 )
4168
4169 (define_insn "neon_vcvt<sup><mode>"
4170  [(set
4171    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4172    (unspec:<VH_CVTTO>
4173     [(match_operand:VH 1 "s_register_operand" "w")]
4174     VCVT_US))]
4175  "TARGET_NEON_FP16INST"
4176  "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4177   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4178 )
4179
4180 (define_insn "neon_vcvt<sup>_n<mode>"
4181   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4182         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4183                            (match_operand:SI 2 "immediate_operand" "i")]
4184                           VCVT_US_N))]
4185   "TARGET_NEON"
4186 {
4187   arm_const_bounds (operands[2], 1, 33);
4188   return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4189 }
4190   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4191 )
4192
4193 (define_insn "neon_vcvt<sup>_n<mode>"
4194  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4195    (unspec:<VH_CVTTO>
4196     [(match_operand:VH 1 "s_register_operand" "w")
4197      (match_operand:SI 2 "immediate_operand" "i")]
4198     VCVT_US_N))]
4199   "TARGET_NEON_FP16INST"
4200 {
4201   arm_const_bounds (operands[2], 0, 17);
4202   return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4203 }
4204  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4205 )
4206
4207 (define_insn "neon_vcvt<sup>_n<mode>"
4208   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4209         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4210                            (match_operand:SI 2 "immediate_operand" "i")]
4211                           VCVT_US_N))]
4212   "TARGET_NEON"
4213 {
4214   arm_const_bounds (operands[2], 1, 33);
4215   return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4216 }
4217   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4218 )
4219
4220 (define_insn "neon_vcvt<sup>_n<mode>"
4221  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4222    (unspec:<VH_CVTTO>
4223     [(match_operand:VCVTHI 1 "s_register_operand" "w")
4224      (match_operand:SI 2 "immediate_operand" "i")]
4225     VCVT_US_N))]
4226  "TARGET_NEON_FP16INST"
4227 {
4228   arm_const_bounds (operands[2], 0, 17);
4229   return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4230 }
4231  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4232 )
4233
4234 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4235  [(set
4236    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4237    (unspec:<VH_CVTTO>
4238     [(match_operand:VH 1 "s_register_operand" "w")]
4239     VCVT_HF_US))]
4240  "TARGET_NEON_FP16INST"
4241  "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4242   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4243 )
4244
4245 (define_insn "neon_vmovn<mode>"
4246   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4247         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4248                            UNSPEC_VMOVN))]
4249   "TARGET_NEON"
4250   "vmovn.<V_if_elem>\t%P0, %q1"
4251   [(set_attr "type" "neon_shift_imm_narrow_q")]
4252 )
4253
4254 (define_insn "neon_vqmovn<sup><mode>"
4255   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4256         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4257                            VQMOVN))]
4258   "TARGET_NEON"
4259   "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4260   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4261 )
4262
4263 (define_insn "neon_vqmovun<mode>"
4264   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4265         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4266                            UNSPEC_VQMOVUN))]
4267   "TARGET_NEON"
4268   "vqmovun.<V_s_elem>\t%P0, %q1"
4269   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4270 )
4271
4272 (define_insn "neon_vmovl<sup><mode>"
4273   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4274         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4275                           VMOVL))]
4276   "TARGET_NEON"
4277   "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4278   [(set_attr "type" "neon_shift_imm_long")]
4279 )
4280
4281 (define_insn "neon_vmul_lane<mode>"
4282   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4283         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4284                      (match_operand:VMD 2 "s_register_operand"
4285                                         "<scalar_mul_constraint>")
4286                      (match_operand:SI 3 "immediate_operand" "i")]
4287                     UNSPEC_VMUL_LANE))]
4288   "TARGET_NEON"
4289 {
4290   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4291 }
4292   [(set (attr "type")
4293      (if_then_else (match_test "<Is_float_mode>")
4294                    (const_string "neon_fp_mul_s_scalar<q>")
4295                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4296 )
4297
4298 (define_insn "neon_vmul_lane<mode>"
4299   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4300         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4301                      (match_operand:<V_HALF> 2 "s_register_operand"
4302                                              "<scalar_mul_constraint>")
4303                      (match_operand:SI 3 "immediate_operand" "i")]
4304                     UNSPEC_VMUL_LANE))]
4305   "TARGET_NEON"
4306 {
4307   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4308 }
4309   [(set (attr "type")
4310      (if_then_else (match_test "<Is_float_mode>")
4311                    (const_string "neon_fp_mul_s_scalar<q>")
4312                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4313 )
4314
4315 (define_insn "neon_vmul_lane<mode>"
4316   [(set (match_operand:VH 0 "s_register_operand" "=w")
4317         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4318                     (match_operand:V4HF 2 "s_register_operand"
4319                      "<scalar_mul_constraint>")
4320                      (match_operand:SI 3 "immediate_operand" "i")]
4321                      UNSPEC_VMUL_LANE))]
4322   "TARGET_NEON_FP16INST"
4323   "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4324   [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4325 )
4326
4327 (define_insn "neon_vmull<sup>_lane<mode>"
4328   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4329         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4330                            (match_operand:VMDI 2 "s_register_operand"
4331                                                "<scalar_mul_constraint>")
4332                            (match_operand:SI 3 "immediate_operand" "i")]
4333                           VMULL_LANE))]
4334   "TARGET_NEON"
4335 {
4336   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4337 }
4338   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4339 )
4340
4341 (define_insn "neon_vqdmull_lane<mode>"
4342   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4343         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4344                            (match_operand:VMDI 2 "s_register_operand"
4345                                                "<scalar_mul_constraint>")
4346                            (match_operand:SI 3 "immediate_operand" "i")]
4347                           UNSPEC_VQDMULL_LANE))]
4348   "TARGET_NEON"
4349 {
4350   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4351 }
4352   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4353 )
4354
4355 (define_insn "neon_vq<r>dmulh_lane<mode>"
4356   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4357         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4358                       (match_operand:<V_HALF> 2 "s_register_operand"
4359                                               "<scalar_mul_constraint>")
4360                       (match_operand:SI 3 "immediate_operand" "i")]
4361                       VQDMULH_LANE))]
4362   "TARGET_NEON"
4363 {
4364   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4365 }
4366   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4367 )
4368
4369 (define_insn "neon_vq<r>dmulh_lane<mode>"
4370   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4371         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4372                       (match_operand:VMDI 2 "s_register_operand"
4373                                           "<scalar_mul_constraint>")
4374                       (match_operand:SI 3 "immediate_operand" "i")]
4375                       VQDMULH_LANE))]
4376   "TARGET_NEON"
4377 {
4378   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4379 }
4380   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4381 )
4382
4383 ;; vqrdmlah_lane, vqrdmlsh_lane
4384 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4385   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4386         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4387                       (match_operand:VMQI 2 "s_register_operand" "w")
4388                       (match_operand:<V_HALF> 3 "s_register_operand"
4389                                           "<scalar_mul_constraint>")
4390                       (match_operand:SI 4 "immediate_operand" "i")]
4391                      VQRDMLH_AS))]
4392   "TARGET_NEON_RDMA"
4393 {
4394   return
4395    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4396 }
4397   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4398 )
4399
4400 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4401   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4402         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4403                       (match_operand:VMDI 2 "s_register_operand" "w")
4404                       (match_operand:VMDI 3 "s_register_operand"
4405                                           "<scalar_mul_constraint>")
4406                       (match_operand:SI 4 "immediate_operand" "i")]
4407                      VQRDMLH_AS))]
4408   "TARGET_NEON_RDMA"
4409 {
4410   return
4411    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4412 }
4413   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4414 )
4415
4416 (define_insn "neon_vmla_lane<mode>"
4417   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4418         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4419                      (match_operand:VMD 2 "s_register_operand" "w")
4420                      (match_operand:VMD 3 "s_register_operand"
4421                                         "<scalar_mul_constraint>")
4422                      (match_operand:SI 4 "immediate_operand" "i")]
4423                      UNSPEC_VMLA_LANE))]
4424   "TARGET_NEON"
4425 {
4426   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4427 }
4428   [(set (attr "type")
4429      (if_then_else (match_test "<Is_float_mode>")
4430                    (const_string "neon_fp_mla_s_scalar<q>")
4431                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4432 )
4433
4434 (define_insn "neon_vmla_lane<mode>"
4435   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4436         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4437                      (match_operand:VMQ 2 "s_register_operand" "w")
4438                      (match_operand:<V_HALF> 3 "s_register_operand"
4439                                              "<scalar_mul_constraint>")
4440                      (match_operand:SI 4 "immediate_operand" "i")]
4441                      UNSPEC_VMLA_LANE))]
4442   "TARGET_NEON"
4443 {
4444   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4445 }
4446   [(set (attr "type")
4447      (if_then_else (match_test "<Is_float_mode>")
4448                    (const_string "neon_fp_mla_s_scalar<q>")
4449                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4450 )
4451
4452 (define_insn "neon_vmlal<sup>_lane<mode>"
4453   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4454         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4455                            (match_operand:VMDI 2 "s_register_operand" "w")
4456                            (match_operand:VMDI 3 "s_register_operand"
4457                                                "<scalar_mul_constraint>")
4458                            (match_operand:SI 4 "immediate_operand" "i")]
4459                           VMLAL_LANE))]
4460   "TARGET_NEON"
4461 {
4462   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4463 }
4464   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4465 )
4466
4467 (define_insn "neon_vqdmlal_lane<mode>"
4468   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4469         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4470                            (match_operand:VMDI 2 "s_register_operand" "w")
4471                            (match_operand:VMDI 3 "s_register_operand"
4472                                                "<scalar_mul_constraint>")
4473                            (match_operand:SI 4 "immediate_operand" "i")]
4474                           UNSPEC_VQDMLAL_LANE))]
4475   "TARGET_NEON"
4476 {
4477   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4478 }
4479   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4480 )
4481
4482 (define_insn "neon_vmls_lane<mode>"
4483   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4484         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4485                      (match_operand:VMD 2 "s_register_operand" "w")
4486                      (match_operand:VMD 3 "s_register_operand"
4487                                         "<scalar_mul_constraint>")
4488                      (match_operand:SI 4 "immediate_operand" "i")]
4489                     UNSPEC_VMLS_LANE))]
4490   "TARGET_NEON"
4491 {
4492   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4493 }
4494   [(set (attr "type")
4495      (if_then_else (match_test "<Is_float_mode>")
4496                    (const_string "neon_fp_mla_s_scalar<q>")
4497                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4498 )
4499
4500 (define_insn "neon_vmls_lane<mode>"
4501   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4502         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4503                      (match_operand:VMQ 2 "s_register_operand" "w")
4504                      (match_operand:<V_HALF> 3 "s_register_operand"
4505                                              "<scalar_mul_constraint>")
4506                      (match_operand:SI 4 "immediate_operand" "i")]
4507                     UNSPEC_VMLS_LANE))]
4508   "TARGET_NEON"
4509 {
4510   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4511 }
4512   [(set (attr "type")
4513      (if_then_else (match_test "<Is_float_mode>")
4514                    (const_string "neon_fp_mla_s_scalar<q>")
4515                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4516 )
4517
4518 (define_insn "neon_vmlsl<sup>_lane<mode>"
4519   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4520         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4521                            (match_operand:VMDI 2 "s_register_operand" "w")
4522                            (match_operand:VMDI 3 "s_register_operand"
4523                                                "<scalar_mul_constraint>")
4524                            (match_operand:SI 4 "immediate_operand" "i")]
4525                           VMLSL_LANE))]
4526   "TARGET_NEON"
4527 {
4528   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4529 }
4530   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4531 )
4532
4533 (define_insn "neon_vqdmlsl_lane<mode>"
4534   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4535         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4536                            (match_operand:VMDI 2 "s_register_operand" "w")
4537                            (match_operand:VMDI 3 "s_register_operand"
4538                                                "<scalar_mul_constraint>")
4539                            (match_operand:SI 4 "immediate_operand" "i")]
4540                           UNSPEC_VQDMLSL_LANE))]
4541   "TARGET_NEON"
4542 {
4543   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4544 }
4545   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4546 )
4547
4548 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4549 ; core register into a temp register, then use a scalar taken from that. This
4550 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4551 ; or extracted from another vector. The latter case it's currently better to
4552 ; use the "_lane" variant, and the former case can probably be implemented
4553 ; using vld1_lane, but that hasn't been done yet.
4554
4555 (define_expand "neon_vmul_n<mode>"
4556   [(match_operand:VMD 0 "s_register_operand" "")
4557    (match_operand:VMD 1 "s_register_operand" "")
4558    (match_operand:<V_elem> 2 "s_register_operand" "")]
4559   "TARGET_NEON"
4560 {
4561   rtx tmp = gen_reg_rtx (<MODE>mode);
4562   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4563   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4564                                        const0_rtx));
4565   DONE;
4566 })
4567
4568 (define_expand "neon_vmul_n<mode>"
4569   [(match_operand:VMQ 0 "s_register_operand" "")
4570    (match_operand:VMQ 1 "s_register_operand" "")
4571    (match_operand:<V_elem> 2 "s_register_operand" "")]
4572   "TARGET_NEON"
4573 {
4574   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4575   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4576   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4577                                        const0_rtx));
4578   DONE;
4579 })
4580
4581 (define_expand "neon_vmul_n<mode>"
4582   [(match_operand:VH 0 "s_register_operand")
4583    (match_operand:VH 1 "s_register_operand")
4584    (match_operand:<V_elem> 2 "s_register_operand")]
4585   "TARGET_NEON_FP16INST"
4586 {
4587   rtx tmp = gen_reg_rtx (V4HFmode);
4588   emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4589   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4590                                        const0_rtx));
4591   DONE;
4592 })
4593
4594 (define_expand "neon_vmulls_n<mode>"
4595   [(match_operand:<V_widen> 0 "s_register_operand" "")
4596    (match_operand:VMDI 1 "s_register_operand" "")
4597    (match_operand:<V_elem> 2 "s_register_operand" "")]
4598   "TARGET_NEON"
4599 {
4600   rtx tmp = gen_reg_rtx (<MODE>mode);
4601   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4602   emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4603                                          const0_rtx));
4604   DONE;
4605 })
4606
4607 (define_expand "neon_vmullu_n<mode>"
4608   [(match_operand:<V_widen> 0 "s_register_operand" "")
4609    (match_operand:VMDI 1 "s_register_operand" "")
4610    (match_operand:<V_elem> 2 "s_register_operand" "")]
4611   "TARGET_NEON"
4612 {
4613   rtx tmp = gen_reg_rtx (<MODE>mode);
4614   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4615   emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4616                                          const0_rtx));
4617   DONE;
4618 })
4619
4620 (define_expand "neon_vqdmull_n<mode>"
4621   [(match_operand:<V_widen> 0 "s_register_operand" "")
4622    (match_operand:VMDI 1 "s_register_operand" "")
4623    (match_operand:<V_elem> 2 "s_register_operand" "")]
4624   "TARGET_NEON"
4625 {
4626   rtx tmp = gen_reg_rtx (<MODE>mode);
4627   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4628   emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4629                                           const0_rtx));
4630   DONE;
4631 })
4632
4633 (define_expand "neon_vqdmulh_n<mode>"
4634   [(match_operand:VMDI 0 "s_register_operand" "")
4635    (match_operand:VMDI 1 "s_register_operand" "")
4636    (match_operand:<V_elem> 2 "s_register_operand" "")]
4637   "TARGET_NEON"
4638 {
4639   rtx tmp = gen_reg_rtx (<MODE>mode);
4640   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4641   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4642                                           const0_rtx));
4643   DONE;
4644 })
4645
4646 (define_expand "neon_vqrdmulh_n<mode>"
4647   [(match_operand:VMDI 0 "s_register_operand" "")
4648    (match_operand:VMDI 1 "s_register_operand" "")
4649    (match_operand:<V_elem> 2 "s_register_operand" "")]
4650   "TARGET_NEON"
4651 {
4652   rtx tmp = gen_reg_rtx (<MODE>mode);
4653   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4654   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4655                                           const0_rtx));
4656   DONE;
4657 })
4658
4659 (define_expand "neon_vqdmulh_n<mode>"
4660   [(match_operand:VMQI 0 "s_register_operand" "")
4661    (match_operand:VMQI 1 "s_register_operand" "")
4662    (match_operand:<V_elem> 2 "s_register_operand" "")]
4663   "TARGET_NEON"
4664 {
4665   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4666   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4667   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4668                                           const0_rtx));
4669   DONE;
4670 })
4671
4672 (define_expand "neon_vqrdmulh_n<mode>"
4673   [(match_operand:VMQI 0 "s_register_operand" "")
4674    (match_operand:VMQI 1 "s_register_operand" "")
4675    (match_operand:<V_elem> 2 "s_register_operand" "")]
4676   "TARGET_NEON"
4677 {
4678   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4679   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4680   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4681                                            const0_rtx));
4682   DONE;
4683 })
4684
4685 (define_expand "neon_vmla_n<mode>"
4686   [(match_operand:VMD 0 "s_register_operand" "")
4687    (match_operand:VMD 1 "s_register_operand" "")
4688    (match_operand:VMD 2 "s_register_operand" "")
4689    (match_operand:<V_elem> 3 "s_register_operand" "")]
4690   "TARGET_NEON"
4691 {
4692   rtx tmp = gen_reg_rtx (<MODE>mode);
4693   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4694   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4695                                        tmp, const0_rtx));
4696   DONE;
4697 })
4698
4699 (define_expand "neon_vmla_n<mode>"
4700   [(match_operand:VMQ 0 "s_register_operand" "")
4701    (match_operand:VMQ 1 "s_register_operand" "")
4702    (match_operand:VMQ 2 "s_register_operand" "")
4703    (match_operand:<V_elem> 3 "s_register_operand" "")]
4704   "TARGET_NEON"
4705 {
4706   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4707   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4708   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4709                                        tmp, const0_rtx));
4710   DONE;
4711 })
4712
4713 (define_expand "neon_vmlals_n<mode>"
4714   [(match_operand:<V_widen> 0 "s_register_operand" "")
4715    (match_operand:<V_widen> 1 "s_register_operand" "")
4716    (match_operand:VMDI 2 "s_register_operand" "")
4717    (match_operand:<V_elem> 3 "s_register_operand" "")]
4718   "TARGET_NEON"
4719 {
4720   rtx tmp = gen_reg_rtx (<MODE>mode);
4721   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4722   emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4723                                          tmp, const0_rtx));
4724   DONE;
4725 })
4726
4727 (define_expand "neon_vmlalu_n<mode>"
4728   [(match_operand:<V_widen> 0 "s_register_operand" "")
4729    (match_operand:<V_widen> 1 "s_register_operand" "")
4730    (match_operand:VMDI 2 "s_register_operand" "")
4731    (match_operand:<V_elem> 3 "s_register_operand" "")]
4732   "TARGET_NEON"
4733 {
4734   rtx tmp = gen_reg_rtx (<MODE>mode);
4735   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4736   emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4737                                          tmp, const0_rtx));
4738   DONE;
4739 })
4740
4741 (define_expand "neon_vqdmlal_n<mode>"
4742   [(match_operand:<V_widen> 0 "s_register_operand" "")
4743    (match_operand:<V_widen> 1 "s_register_operand" "")
4744    (match_operand:VMDI 2 "s_register_operand" "")
4745    (match_operand:<V_elem> 3 "s_register_operand" "")]
4746   "TARGET_NEON"
4747 {
4748   rtx tmp = gen_reg_rtx (<MODE>mode);
4749   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4750   emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4751                                           tmp, const0_rtx));
4752   DONE;
4753 })
4754
4755 (define_expand "neon_vmls_n<mode>"
4756   [(match_operand:VMD 0 "s_register_operand" "")
4757    (match_operand:VMD 1 "s_register_operand" "")
4758    (match_operand:VMD 2 "s_register_operand" "")
4759    (match_operand:<V_elem> 3 "s_register_operand" "")]
4760   "TARGET_NEON"
4761 {
4762   rtx tmp = gen_reg_rtx (<MODE>mode);
4763   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4764   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4765                                        tmp, const0_rtx));
4766   DONE;
4767 })
4768
4769 (define_expand "neon_vmls_n<mode>"
4770   [(match_operand:VMQ 0 "s_register_operand" "")
4771    (match_operand:VMQ 1 "s_register_operand" "")
4772    (match_operand:VMQ 2 "s_register_operand" "")
4773    (match_operand:<V_elem> 3 "s_register_operand" "")]
4774   "TARGET_NEON"
4775 {
4776   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4777   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4778   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4779                                        tmp, const0_rtx));
4780   DONE;
4781 })
4782
4783 (define_expand "neon_vmlsls_n<mode>"
4784   [(match_operand:<V_widen> 0 "s_register_operand" "")
4785    (match_operand:<V_widen> 1 "s_register_operand" "")
4786    (match_operand:VMDI 2 "s_register_operand" "")
4787    (match_operand:<V_elem> 3 "s_register_operand" "")]
4788   "TARGET_NEON"
4789 {
4790   rtx tmp = gen_reg_rtx (<MODE>mode);
4791   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4792   emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4793                                         tmp, const0_rtx));
4794   DONE;
4795 })
4796
4797 (define_expand "neon_vmlslu_n<mode>"
4798   [(match_operand:<V_widen> 0 "s_register_operand" "")
4799    (match_operand:<V_widen> 1 "s_register_operand" "")
4800    (match_operand:VMDI 2 "s_register_operand" "")
4801    (match_operand:<V_elem> 3 "s_register_operand" "")]
4802   "TARGET_NEON"
4803 {
4804   rtx tmp = gen_reg_rtx (<MODE>mode);
4805   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4806   emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4807                                         tmp, const0_rtx));
4808   DONE;
4809 })
4810
4811 (define_expand "neon_vqdmlsl_n<mode>"
4812   [(match_operand:<V_widen> 0 "s_register_operand" "")
4813    (match_operand:<V_widen> 1 "s_register_operand" "")
4814    (match_operand:VMDI 2 "s_register_operand" "")
4815    (match_operand:<V_elem> 3 "s_register_operand" "")]
4816   "TARGET_NEON"
4817 {
4818   rtx tmp = gen_reg_rtx (<MODE>mode);
4819   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4820   emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4821                                           tmp, const0_rtx));
4822   DONE;
4823 })
4824
4825 (define_insn "neon_vext<mode>"
4826   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4827         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4828                       (match_operand:VDQX 2 "s_register_operand" "w")
4829                       (match_operand:SI 3 "immediate_operand" "i")]
4830                      UNSPEC_VEXT))]
4831   "TARGET_NEON"
4832 {
4833   arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4834   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4835 }
4836   [(set_attr "type" "neon_ext<q>")]
4837 )
4838
4839 (define_insn "neon_vrev64<mode>"
4840   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4841         (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4842                     UNSPEC_VREV64))]
4843   "TARGET_NEON"
4844   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4845   [(set_attr "type" "neon_rev<q>")]
4846 )
4847
4848 (define_insn "neon_vrev32<mode>"
4849   [(set (match_operand:VX 0 "s_register_operand" "=w")
4850         (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4851                    UNSPEC_VREV32))]
4852   "TARGET_NEON"
4853   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4854   [(set_attr "type" "neon_rev<q>")]
4855 )
4856
4857 (define_insn "neon_vrev16<mode>"
4858   [(set (match_operand:VE 0 "s_register_operand" "=w")
4859         (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4860                    UNSPEC_VREV16))]
4861   "TARGET_NEON"
4862   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4863   [(set_attr "type" "neon_rev<q>")]
4864 )
4865
4866 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4867 ; allocation. For an intrinsic of form:
4868 ;   rD = vbsl_* (rS, rN, rM)
4869 ; We can use any of:
4870 ;   vbsl rS, rN, rM  (if D = S)
4871 ;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4872 ;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4873
4874 (define_insn "neon_vbsl<mode>_internal"
4875   [(set (match_operand:VDQX 0 "s_register_operand"               "=w,w,w")
4876         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4877                       (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4878                       (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4879                      UNSPEC_VBSL))]
4880   "TARGET_NEON"
4881   "@
4882   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4883   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4884   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4885   [(set_attr "type" "neon_bsl<q>")]
4886 )
4887
4888 (define_expand "neon_vbsl<mode>"
4889   [(set (match_operand:VDQX 0 "s_register_operand" "")
4890         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4891                       (match_operand:VDQX 2 "s_register_operand" "")
4892                       (match_operand:VDQX 3 "s_register_operand" "")]
4893                      UNSPEC_VBSL))]
4894   "TARGET_NEON"
4895 {
4896   /* We can't alias operands together if they have different modes.  */
4897   operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4898 })
4899
4900 ;; vshl, vrshl
4901 (define_insn "neon_v<shift_op><sup><mode>"
4902   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4903         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4904                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4905                       VSHL))]
4906   "TARGET_NEON"
4907   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4908   [(set_attr "type" "neon_shift_imm<q>")]
4909 )
4910
4911 ;; vqshl, vqrshl
4912 (define_insn "neon_v<shift_op><sup><mode>"
4913   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4914         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4915                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4916                       VQSHL))]
4917   "TARGET_NEON"
4918   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4919   [(set_attr "type" "neon_sat_shift_imm<q>")]
4920 )
4921
4922 ;; vshr_n, vrshr_n
4923 (define_insn "neon_v<shift_op><sup>_n<mode>"
4924   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4925         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4926                        (match_operand:SI 2 "immediate_operand" "i")]
4927                       VSHR_N))]
4928   "TARGET_NEON"
4929 {
4930   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4931   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4932 }
4933   [(set_attr "type" "neon_shift_imm<q>")]
4934 )
4935
4936 ;; vshrn_n, vrshrn_n
4937 (define_insn "neon_v<shift_op>_n<mode>"
4938   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4939         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4940                             (match_operand:SI 2 "immediate_operand" "i")]
4941                            VSHRN_N))]
4942   "TARGET_NEON"
4943 {
4944   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4945   return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4946 }
4947   [(set_attr "type" "neon_shift_imm_narrow_q")]
4948 )
4949
4950 ;; vqshrn_n, vqrshrn_n
4951 (define_insn "neon_v<shift_op><sup>_n<mode>"
4952   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4953         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4954                             (match_operand:SI 2 "immediate_operand" "i")]
4955                            VQSHRN_N))]
4956   "TARGET_NEON"
4957 {
4958   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4959   return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4960 }
4961   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4962 )
4963
4964 ;; vqshrun_n, vqrshrun_n
4965 (define_insn "neon_v<shift_op>_n<mode>"
4966   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4967         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4968                             (match_operand:SI 2 "immediate_operand" "i")]
4969                            VQSHRUN_N))]
4970   "TARGET_NEON"
4971 {
4972   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4973   return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4974 }
4975   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4976 )
4977
4978 (define_insn "neon_vshl_n<mode>"
4979   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4980         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4981                        (match_operand:SI 2 "immediate_operand" "i")]
4982                       UNSPEC_VSHL_N))]
4983   "TARGET_NEON"
4984 {
4985   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4986   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4987 }
4988   [(set_attr "type" "neon_shift_imm<q>")]
4989 )
4990
4991 (define_insn "neon_vqshl_<sup>_n<mode>"
4992   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4993         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4994                        (match_operand:SI 2 "immediate_operand" "i")]
4995                       VQSHL_N))]
4996   "TARGET_NEON"
4997 {
4998   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4999   return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5000 }
5001   [(set_attr "type" "neon_sat_shift_imm<q>")]
5002 )
5003
5004 (define_insn "neon_vqshlu_n<mode>"
5005   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5006         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5007                        (match_operand:SI 2 "immediate_operand" "i")]
5008                       UNSPEC_VQSHLU_N))]
5009   "TARGET_NEON"
5010 {
5011   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5012   return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5013 }
5014   [(set_attr "type" "neon_sat_shift_imm<q>")]
5015 )
5016
5017 (define_insn "neon_vshll<sup>_n<mode>"
5018   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5019         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5020                            (match_operand:SI 2 "immediate_operand" "i")]
5021                           VSHLL_N))]
5022   "TARGET_NEON"
5023 {
5024   /* The boundaries are: 0 < imm <= size.  */
5025   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5026   return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5027 }
5028   [(set_attr "type" "neon_shift_imm_long")]
5029 )
5030
5031 ;; vsra_n, vrsra_n
5032 (define_insn "neon_v<shift_op><sup>_n<mode>"
5033   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5034         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5035                        (match_operand:VDQIX 2 "s_register_operand" "w")
5036                        (match_operand:SI 3 "immediate_operand" "i")]
5037                       VSRA_N))]
5038   "TARGET_NEON"
5039 {
5040   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5041   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5042 }
5043   [(set_attr "type" "neon_shift_acc<q>")]
5044 )
5045
5046 (define_insn "neon_vsri_n<mode>"
5047   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5048         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5049                        (match_operand:VDQIX 2 "s_register_operand" "w")
5050                        (match_operand:SI 3 "immediate_operand" "i")]
5051                       UNSPEC_VSRI))]
5052   "TARGET_NEON"
5053 {
5054   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5055   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5056 }
5057   [(set_attr "type" "neon_shift_reg<q>")]
5058 )
5059
5060 (define_insn "neon_vsli_n<mode>"
5061   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5062         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5063                        (match_operand:VDQIX 2 "s_register_operand" "w")
5064                        (match_operand:SI 3 "immediate_operand" "i")]
5065                       UNSPEC_VSLI))]
5066   "TARGET_NEON"
5067 {
5068   arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5069   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5070 }
5071   [(set_attr "type" "neon_shift_reg<q>")]
5072 )
5073
5074 (define_insn "neon_vtbl1v8qi"
5075   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5076         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5077                       (match_operand:V8QI 2 "s_register_operand" "w")]
5078                      UNSPEC_VTBL))]
5079   "TARGET_NEON"
5080   "vtbl.8\t%P0, {%P1}, %P2"
5081   [(set_attr "type" "neon_tbl1")]
5082 )
5083
5084 (define_insn "neon_vtbl2v8qi"
5085   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086         (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5087                       (match_operand:V8QI 2 "s_register_operand" "w")]
5088                      UNSPEC_VTBL))]
5089   "TARGET_NEON"
5090 {
5091   rtx ops[4];
5092   int tabbase = REGNO (operands[1]);
5093
5094   ops[0] = operands[0];
5095   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097   ops[3] = operands[2];
5098   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5099
5100   return "";
5101 }
5102   [(set_attr "type" "neon_tbl2")]
5103 )
5104
5105 (define_insn "neon_vtbl3v8qi"
5106   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5107         (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5108                       (match_operand:V8QI 2 "s_register_operand" "w")]
5109                      UNSPEC_VTBL))]
5110   "TARGET_NEON"
5111 {
5112   rtx ops[5];
5113   int tabbase = REGNO (operands[1]);
5114
5115   ops[0] = operands[0];
5116   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5117   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5118   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5119   ops[4] = operands[2];
5120   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5121
5122   return "";
5123 }
5124   [(set_attr "type" "neon_tbl3")]
5125 )
5126
5127 (define_insn "neon_vtbl4v8qi"
5128   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5129         (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5130                       (match_operand:V8QI 2 "s_register_operand" "w")]
5131                      UNSPEC_VTBL))]
5132   "TARGET_NEON"
5133 {
5134   rtx ops[6];
5135   int tabbase = REGNO (operands[1]);
5136
5137   ops[0] = operands[0];
5138   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5139   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5140   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5141   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5142   ops[5] = operands[2];
5143   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5144
5145   return "";
5146 }
5147   [(set_attr "type" "neon_tbl4")]
5148 )
5149
5150 ;; These three are used by the vec_perm infrastructure for V16QImode.
5151 (define_insn_and_split "neon_vtbl1v16qi"
5152   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5153         (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5154                        (match_operand:V16QI 2 "s_register_operand" "w")]
5155                       UNSPEC_VTBL))]
5156   "TARGET_NEON"
5157   "#"
5158   "&& reload_completed"
5159   [(const_int 0)]
5160 {
5161   rtx op0, op1, op2, part0, part2;
5162   unsigned ofs;
5163
5164   op0 = operands[0];
5165   op1 = gen_lowpart (TImode, operands[1]);
5166   op2 = operands[2];
5167
5168   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5169   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5170   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5171   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5172
5173   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5174   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5175   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5176   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5177   DONE;
5178 }
5179   [(set_attr "type" "multiple")]
5180 )
5181
5182 (define_insn_and_split "neon_vtbl2v16qi"
5183   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5184         (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5185                        (match_operand:V16QI 2 "s_register_operand" "w")]
5186                       UNSPEC_VTBL))]
5187   "TARGET_NEON"
5188   "#"
5189   "&& reload_completed"
5190   [(const_int 0)]
5191 {
5192   rtx op0, op1, op2, part0, part2;
5193   unsigned ofs;
5194
5195   op0 = operands[0];
5196   op1 = operands[1];
5197   op2 = operands[2];
5198
5199   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5200   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5201   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5202   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5203
5204   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5205   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5206   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5207   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5208   DONE;
5209 }
5210   [(set_attr "type" "multiple")]
5211 )
5212
5213 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5214 ;; handle quad-word input modes, producing octa-word output modes.  But
5215 ;; that requires us to add support for octa-word vector modes in moves.
5216 ;; That seems overkill for this one use in vec_perm.
5217 (define_insn_and_split "neon_vcombinev16qi"
5218   [(set (match_operand:OI 0 "s_register_operand" "=w")
5219         (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5220                     (match_operand:V16QI 2 "s_register_operand" "w")]
5221                    UNSPEC_VCONCAT))]
5222   "TARGET_NEON"
5223   "#"
5224   "&& reload_completed"
5225   [(const_int 0)]
5226 {
5227   neon_split_vcombine (operands);
5228   DONE;
5229 }
5230 [(set_attr "type" "multiple")]
5231 )
5232
5233 (define_insn "neon_vtbx1v8qi"
5234   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5235         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5236                       (match_operand:V8QI 2 "s_register_operand" "w")
5237                       (match_operand:V8QI 3 "s_register_operand" "w")]
5238                      UNSPEC_VTBX))]
5239   "TARGET_NEON"
5240   "vtbx.8\t%P0, {%P2}, %P3"
5241   [(set_attr "type" "neon_tbl1")]
5242 )
5243
5244 (define_insn "neon_vtbx2v8qi"
5245   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5246         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5247                       (match_operand:TI 2 "s_register_operand" "w")
5248                       (match_operand:V8QI 3 "s_register_operand" "w")]
5249                      UNSPEC_VTBX))]
5250   "TARGET_NEON"
5251 {
5252   rtx ops[4];
5253   int tabbase = REGNO (operands[2]);
5254
5255   ops[0] = operands[0];
5256   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5257   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5258   ops[3] = operands[3];
5259   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5260
5261   return "";
5262 }
5263   [(set_attr "type" "neon_tbl2")]
5264 )
5265
5266 (define_insn "neon_vtbx3v8qi"
5267   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5268         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5269                       (match_operand:EI 2 "s_register_operand" "w")
5270                       (match_operand:V8QI 3 "s_register_operand" "w")]
5271                      UNSPEC_VTBX))]
5272   "TARGET_NEON"
5273 {
5274   rtx ops[5];
5275   int tabbase = REGNO (operands[2]);
5276
5277   ops[0] = operands[0];
5278   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5279   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5280   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5281   ops[4] = operands[3];
5282   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5283
5284   return "";
5285 }
5286   [(set_attr "type" "neon_tbl3")]
5287 )
5288
5289 (define_insn "neon_vtbx4v8qi"
5290   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5291         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5292                       (match_operand:OI 2 "s_register_operand" "w")
5293                       (match_operand:V8QI 3 "s_register_operand" "w")]
5294                      UNSPEC_VTBX))]
5295   "TARGET_NEON"
5296 {
5297   rtx ops[6];
5298   int tabbase = REGNO (operands[2]);
5299
5300   ops[0] = operands[0];
5301   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5302   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5303   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5304   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5305   ops[5] = operands[3];
5306   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5307
5308   return "";
5309 }
5310   [(set_attr "type" "neon_tbl4")]
5311 )
5312
5313 (define_expand "neon_vtrn<mode>_internal"
5314   [(parallel
5315     [(set (match_operand:VDQWH 0 "s_register_operand")
5316           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5317                          (match_operand:VDQWH 2 "s_register_operand")]
5318            UNSPEC_VTRN1))
5319      (set (match_operand:VDQWH 3 "s_register_operand")
5320           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5321   "TARGET_NEON"
5322   ""
5323 )
5324
5325 ;; Note: Different operand numbering to handle tied registers correctly.
5326 (define_insn "*neon_vtrn<mode>_insn"
5327   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5328         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5329                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5330          UNSPEC_VTRN1))
5331    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5332         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5333          UNSPEC_VTRN2))]
5334   "TARGET_NEON"
5335   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5336   [(set_attr "type" "neon_permute<q>")]
5337 )
5338
5339 (define_expand "neon_vzip<mode>_internal"
5340   [(parallel
5341     [(set (match_operand:VDQWH 0 "s_register_operand")
5342           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5343                          (match_operand:VDQWH 2 "s_register_operand")]
5344            UNSPEC_VZIP1))
5345     (set (match_operand:VDQWH 3 "s_register_operand")
5346          (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5347   "TARGET_NEON"
5348   ""
5349 )
5350
5351 ;; Note: Different operand numbering to handle tied registers correctly.
5352 (define_insn "*neon_vzip<mode>_insn"
5353   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5354         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5355                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5356          UNSPEC_VZIP1))
5357    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5358         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5359          UNSPEC_VZIP2))]
5360   "TARGET_NEON"
5361   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5362   [(set_attr "type" "neon_zip<q>")]
5363 )
5364
5365 (define_expand "neon_vuzp<mode>_internal"
5366   [(parallel
5367     [(set (match_operand:VDQWH 0 "s_register_operand")
5368           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5369                         (match_operand:VDQWH 2 "s_register_operand")]
5370            UNSPEC_VUZP1))
5371      (set (match_operand:VDQWH 3 "s_register_operand" "")
5372           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5373   "TARGET_NEON"
5374   ""
5375 )
5376
5377 ;; Note: Different operand numbering to handle tied registers correctly.
5378 (define_insn "*neon_vuzp<mode>_insn"
5379   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5380         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5381                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5382          UNSPEC_VUZP1))
5383    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5384         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5385          UNSPEC_VUZP2))]
5386   "TARGET_NEON"
5387   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5388   [(set_attr "type" "neon_zip<q>")]
5389 )
5390
5391 (define_expand "vec_load_lanes<mode><mode>"
5392   [(set (match_operand:VDQX 0 "s_register_operand")
5393         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5394                      UNSPEC_VLD1))]
5395   "TARGET_NEON")
5396
5397 (define_insn "neon_vld1<mode>"
5398   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5399         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5400                     UNSPEC_VLD1))]
5401   "TARGET_NEON"
5402   "vld1.<V_sz_elem>\t%h0, %A1"
5403   [(set_attr "type" "neon_load1_1reg<q>")]
5404 )
5405
5406 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5407 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5408 ;; lane order here.
5409 (define_insn "neon_vld1_lane<mode>"
5410   [(set (match_operand:VDX 0 "s_register_operand" "=w")
5411         (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412                      (match_operand:VDX 2 "s_register_operand" "0")
5413                      (match_operand:SI 3 "immediate_operand" "i")]
5414                     UNSPEC_VLD1_LANE))]
5415   "TARGET_NEON"
5416 {
5417   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419   operands[3] = GEN_INT (lane);
5420   if (max == 1)
5421     return "vld1.<V_sz_elem>\t%P0, %A1";
5422   else
5423     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5424 }
5425   [(set_attr "type" "neon_load1_one_lane<q>")]
5426 )
5427
5428 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5429 ;; here on big endian targets.
5430 (define_insn "neon_vld1_lane<mode>"
5431   [(set (match_operand:VQX 0 "s_register_operand" "=w")
5432         (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5433                      (match_operand:VQX 2 "s_register_operand" "0")
5434                      (match_operand:SI 3 "immediate_operand" "i")]
5435                     UNSPEC_VLD1_LANE))]
5436   "TARGET_NEON"
5437 {
5438   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5439   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5440   operands[3] = GEN_INT (lane);
5441   int regno = REGNO (operands[0]);
5442   if (lane >= max / 2)
5443     {
5444       lane -= max / 2;
5445       regno += 2;
5446       operands[3] = GEN_INT (lane);
5447     }
5448   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5449   if (max == 2)
5450     return "vld1.<V_sz_elem>\t%P0, %A1";
5451   else
5452     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5453 }
5454   [(set_attr "type" "neon_load1_one_lane<q>")]
5455 )
5456
5457 (define_insn "neon_vld1_dup<mode>"
5458   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5459         (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5460   "TARGET_NEON"
5461   "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5462   [(set_attr "type" "neon_load1_all_lanes<q>")]
5463 )
5464
5465 ;; Special case for DImode.  Treat it exactly like a simple load.
5466 (define_expand "neon_vld1_dupdi"
5467   [(set (match_operand:DI 0 "s_register_operand" "")
5468         (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5469                    UNSPEC_VLD1))]
5470   "TARGET_NEON"
5471   ""
5472 )
5473
5474 (define_insn "neon_vld1_dup<mode>"
5475   [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5476         (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5477   "TARGET_NEON"
5478 {
5479   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5480 }
5481   [(set_attr "type" "neon_load1_all_lanes<q>")]
5482 )
5483
5484 (define_insn_and_split "neon_vld1_dupv2di"
5485    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5486     (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5487    "TARGET_NEON"
5488    "#"
5489    "&& reload_completed"
5490    [(const_int 0)]
5491    {
5492     rtx tmprtx = gen_lowpart (DImode, operands[0]);
5493     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5494     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5495     DONE;
5496     }
5497   [(set_attr "length" "8")
5498    (set_attr "type" "neon_load1_all_lanes_q")]
5499 )
5500
5501 (define_expand "vec_store_lanes<mode><mode>"
5502   [(set (match_operand:VDQX 0 "neon_struct_operand")
5503         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5504                      UNSPEC_VST1))]
5505   "TARGET_NEON")
5506
5507 (define_insn "neon_vst1<mode>"
5508   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5509         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5510                      UNSPEC_VST1))]
5511   "TARGET_NEON"
5512   "vst1.<V_sz_elem>\t%h1, %A0"
5513   [(set_attr "type" "neon_store1_1reg<q>")])
5514
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5519         (unspec:<V_elem>
5520           [(match_operand:VDX 1 "s_register_operand" "w")
5521            (match_operand:SI 2 "immediate_operand" "i")]
5522           UNSPEC_VST1_LANE))]
5523   "TARGET_NEON"
5524 {
5525   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527   operands[2] = GEN_INT (lane);
5528   if (max == 1)
5529     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5530   else
5531     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5532 }
5533   [(set_attr "type" "neon_store1_one_lane<q>")]
5534 )
5535
5536 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5537 ;; here on big endian targets.
5538 (define_insn "neon_vst1_lane<mode>"
5539   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5540         (unspec:<V_elem>
5541           [(match_operand:VQX 1 "s_register_operand" "w")
5542            (match_operand:SI 2 "immediate_operand" "i")]
5543           UNSPEC_VST1_LANE))]
5544   "TARGET_NEON"
5545 {
5546   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5547   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5548   int regno = REGNO (operands[1]);
5549   if (lane >= max / 2)
5550     {
5551       lane -= max / 2;
5552       regno += 2;
5553     }
5554   operands[2] = GEN_INT (lane);
5555   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5556   if (max == 2)
5557     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5558   else
5559     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5560 }
5561   [(set_attr "type" "neon_store1_one_lane<q>")]
5562 )
5563
5564 (define_expand "vec_load_lanesti<mode>"
5565   [(set (match_operand:TI 0 "s_register_operand")
5566         (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5567                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568                    UNSPEC_VLD2))]
5569   "TARGET_NEON")
5570
5571 (define_insn "neon_vld2<mode>"
5572   [(set (match_operand:TI 0 "s_register_operand" "=w")
5573         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5574                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575                    UNSPEC_VLD2))]
5576   "TARGET_NEON"
5577 {
5578   if (<V_sz_elem> == 64)
5579     return "vld1.64\t%h0, %A1";
5580   else
5581     return "vld2.<V_sz_elem>\t%h0, %A1";
5582 }
5583   [(set (attr "type")
5584       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5585                     (const_string "neon_load1_2reg<q>")
5586                     (const_string "neon_load2_2reg<q>")))]
5587 )
5588
5589 (define_expand "vec_load_lanesoi<mode>"
5590   [(set (match_operand:OI 0 "s_register_operand")
5591         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5592                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593                    UNSPEC_VLD2))]
5594   "TARGET_NEON")
5595
5596 (define_insn "neon_vld2<mode>"
5597   [(set (match_operand:OI 0 "s_register_operand" "=w")
5598         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5599                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600                    UNSPEC_VLD2))]
5601   "TARGET_NEON"
5602   "vld2.<V_sz_elem>\t%h0, %A1"
5603   [(set_attr "type" "neon_load2_2reg_q")])
5604
5605 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5606 ;; here on big endian targets.
5607 (define_insn "neon_vld2_lane<mode>"
5608   [(set (match_operand:TI 0 "s_register_operand" "=w")
5609         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5610                     (match_operand:TI 2 "s_register_operand" "0")
5611                     (match_operand:SI 3 "immediate_operand" "i")
5612                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5613                    UNSPEC_VLD2_LANE))]
5614   "TARGET_NEON"
5615 {
5616   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5617   int regno = REGNO (operands[0]);
5618   rtx ops[4];
5619   ops[0] = gen_rtx_REG (DImode, regno);
5620   ops[1] = gen_rtx_REG (DImode, regno + 2);
5621   ops[2] = operands[1];
5622   ops[3] = GEN_INT (lane);
5623   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5624   return "";
5625 }
5626   [(set_attr "type" "neon_load2_one_lane<q>")]
5627 )
5628
5629 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5630 ;; here on big endian targets.
5631 (define_insn "neon_vld2_lane<mode>"
5632   [(set (match_operand:OI 0 "s_register_operand" "=w")
5633         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5634                     (match_operand:OI 2 "s_register_operand" "0")
5635                     (match_operand:SI 3 "immediate_operand" "i")
5636                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5637                    UNSPEC_VLD2_LANE))]
5638   "TARGET_NEON"
5639 {
5640   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5641   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5642   int regno = REGNO (operands[0]);
5643   rtx ops[4];
5644   if (lane >= max / 2)
5645     {
5646       lane -= max / 2;
5647       regno += 2;
5648     }
5649   ops[0] = gen_rtx_REG (DImode, regno);
5650   ops[1] = gen_rtx_REG (DImode, regno + 4);
5651   ops[2] = operands[1];
5652   ops[3] = GEN_INT (lane);
5653   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5654   return "";
5655 }
5656   [(set_attr "type" "neon_load2_one_lane<q>")]
5657 )
5658
5659 (define_insn "neon_vld2_dup<mode>"
5660   [(set (match_operand:TI 0 "s_register_operand" "=w")
5661         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5662                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663                    UNSPEC_VLD2_DUP))]
5664   "TARGET_NEON"
5665 {
5666   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5667     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5668   else
5669     return "vld1.<V_sz_elem>\t%h0, %A1";
5670 }
5671   [(set (attr "type")
5672       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5673                     (const_string "neon_load2_all_lanes<q>")
5674                     (const_string "neon_load1_1reg<q>")))]
5675 )
5676
5677 (define_expand "vec_store_lanesti<mode>"
5678   [(set (match_operand:TI 0 "neon_struct_operand")
5679         (unspec:TI [(match_operand:TI 1 "s_register_operand")
5680                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5681                    UNSPEC_VST2))]
5682   "TARGET_NEON")
5683
5684 (define_insn "neon_vst2<mode>"
5685   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5686         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5687                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688                    UNSPEC_VST2))]
5689   "TARGET_NEON"
5690 {
5691   if (<V_sz_elem> == 64)
5692     return "vst1.64\t%h1, %A0";
5693   else
5694     return "vst2.<V_sz_elem>\t%h1, %A0";
5695 }
5696   [(set (attr "type")
5697       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5698                     (const_string "neon_store1_2reg<q>")
5699                     (const_string "neon_store2_one_lane<q>")))]
5700 )
5701
5702 (define_expand "vec_store_lanesoi<mode>"
5703   [(set (match_operand:OI 0 "neon_struct_operand")
5704         (unspec:OI [(match_operand:OI 1 "s_register_operand")
5705                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5706                    UNSPEC_VST2))]
5707   "TARGET_NEON")
5708
5709 (define_insn "neon_vst2<mode>"
5710   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5711         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5712                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5713                    UNSPEC_VST2))]
5714   "TARGET_NEON"
5715   "vst2.<V_sz_elem>\t%h1, %A0"
5716   [(set_attr "type" "neon_store2_4reg<q>")]
5717 )
5718
5719 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5720 ;; here on big endian targets.
5721 (define_insn "neon_vst2_lane<mode>"
5722   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5723         (unspec:<V_two_elem>
5724           [(match_operand:TI 1 "s_register_operand" "w")
5725            (match_operand:SI 2 "immediate_operand" "i")
5726            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5727           UNSPEC_VST2_LANE))]
5728   "TARGET_NEON"
5729 {
5730   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5731   int regno = REGNO (operands[1]);
5732   rtx ops[4];
5733   ops[0] = operands[0];
5734   ops[1] = gen_rtx_REG (DImode, regno);
5735   ops[2] = gen_rtx_REG (DImode, regno + 2);
5736   ops[3] = GEN_INT (lane);
5737   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5738   return "";
5739 }
5740   [(set_attr "type" "neon_store2_one_lane<q>")]
5741 )
5742
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vst2_lane<mode>"
5746   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5747         (unspec:<V_two_elem>
5748            [(match_operand:OI 1 "s_register_operand" "w")
5749             (match_operand:SI 2 "immediate_operand" "i")
5750             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5751            UNSPEC_VST2_LANE))]
5752   "TARGET_NEON"
5753 {
5754   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5755   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5756   int regno = REGNO (operands[1]);
5757   rtx ops[4];
5758   if (lane >= max / 2)
5759     {
5760       lane -= max / 2;
5761       regno += 2;
5762     }
5763   ops[0] = operands[0];
5764   ops[1] = gen_rtx_REG (DImode, regno);
5765   ops[2] = gen_rtx_REG (DImode, regno + 4);
5766   ops[3] = GEN_INT (lane);
5767   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5768   return "";
5769 }
5770   [(set_attr "type" "neon_store2_one_lane<q>")]
5771 )
5772
5773 (define_expand "vec_load_lanesei<mode>"
5774   [(set (match_operand:EI 0 "s_register_operand")
5775         (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5776                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5777                    UNSPEC_VLD3))]
5778   "TARGET_NEON")
5779
5780 (define_insn "neon_vld3<mode>"
5781   [(set (match_operand:EI 0 "s_register_operand" "=w")
5782         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5783                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784                    UNSPEC_VLD3))]
5785   "TARGET_NEON"
5786 {
5787   if (<V_sz_elem> == 64)
5788     return "vld1.64\t%h0, %A1";
5789   else
5790     return "vld3.<V_sz_elem>\t%h0, %A1";
5791 }
5792   [(set (attr "type")
5793       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5794                     (const_string "neon_load1_3reg<q>")
5795                     (const_string "neon_load3_3reg<q>")))]
5796 )
5797
5798 (define_expand "vec_load_lanesci<mode>"
5799   [(match_operand:CI 0 "s_register_operand")
5800    (match_operand:CI 1 "neon_struct_operand")
5801    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5802   "TARGET_NEON"
5803 {
5804   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5805   DONE;
5806 })
5807
5808 (define_expand "neon_vld3<mode>"
5809   [(match_operand:CI 0 "s_register_operand")
5810    (match_operand:CI 1 "neon_struct_operand")
5811    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5812   "TARGET_NEON"
5813 {
5814   rtx mem;
5815
5816   mem = adjust_address (operands[1], EImode, 0);
5817   emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5818   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5819   emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5820   DONE;
5821 })
5822
5823 (define_insn "neon_vld3qa<mode>"
5824   [(set (match_operand:CI 0 "s_register_operand" "=w")
5825         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5826                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827                    UNSPEC_VLD3A))]
5828   "TARGET_NEON"
5829 {
5830   int regno = REGNO (operands[0]);
5831   rtx ops[4];
5832   ops[0] = gen_rtx_REG (DImode, regno);
5833   ops[1] = gen_rtx_REG (DImode, regno + 4);
5834   ops[2] = gen_rtx_REG (DImode, regno + 8);
5835   ops[3] = operands[1];
5836   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5837   return "";
5838 }
5839   [(set_attr "type" "neon_load3_3reg<q>")]
5840 )
5841
5842 (define_insn "neon_vld3qb<mode>"
5843   [(set (match_operand:CI 0 "s_register_operand" "=w")
5844         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5845                     (match_operand:CI 2 "s_register_operand" "0")
5846                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5847                    UNSPEC_VLD3B))]
5848   "TARGET_NEON"
5849 {
5850   int regno = REGNO (operands[0]);
5851   rtx ops[4];
5852   ops[0] = gen_rtx_REG (DImode, regno + 2);
5853   ops[1] = gen_rtx_REG (DImode, regno + 6);
5854   ops[2] = gen_rtx_REG (DImode, regno + 10);
5855   ops[3] = operands[1];
5856   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5857   return "";
5858 }
5859   [(set_attr "type" "neon_load3_3reg<q>")]
5860 )
5861
5862 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5863 ;; here on big endian targets.
5864 (define_insn "neon_vld3_lane<mode>"
5865   [(set (match_operand:EI 0 "s_register_operand" "=w")
5866         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5867                     (match_operand:EI 2 "s_register_operand" "0")
5868                     (match_operand:SI 3 "immediate_operand" "i")
5869                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5870                    UNSPEC_VLD3_LANE))]
5871   "TARGET_NEON"
5872 {
5873   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5874   int regno = REGNO (operands[0]);
5875   rtx ops[5];
5876   ops[0] = gen_rtx_REG (DImode, regno);
5877   ops[1] = gen_rtx_REG (DImode, regno + 2);
5878   ops[2] = gen_rtx_REG (DImode, regno + 4);
5879   ops[3] = operands[1];
5880   ops[4] = GEN_INT (lane);
5881   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5882                    ops);
5883   return "";
5884 }
5885   [(set_attr "type" "neon_load3_one_lane<q>")]
5886 )
5887
5888 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5889 ;; here on big endian targets.
5890 (define_insn "neon_vld3_lane<mode>"
5891   [(set (match_operand:CI 0 "s_register_operand" "=w")
5892         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5893                     (match_operand:CI 2 "s_register_operand" "0")
5894                     (match_operand:SI 3 "immediate_operand" "i")
5895                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5896                    UNSPEC_VLD3_LANE))]
5897   "TARGET_NEON"
5898 {
5899   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5900   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5901   int regno = REGNO (operands[0]);
5902   rtx ops[5];
5903   if (lane >= max / 2)
5904     {
5905       lane -= max / 2;
5906       regno += 2;
5907     }
5908   ops[0] = gen_rtx_REG (DImode, regno);
5909   ops[1] = gen_rtx_REG (DImode, regno + 4);
5910   ops[2] = gen_rtx_REG (DImode, regno + 8);
5911   ops[3] = operands[1];
5912   ops[4] = GEN_INT (lane);
5913   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5914                    ops);
5915   return "";
5916 }
5917   [(set_attr "type" "neon_load3_one_lane<q>")]
5918 )
5919
5920 (define_insn "neon_vld3_dup<mode>"
5921   [(set (match_operand:EI 0 "s_register_operand" "=w")
5922         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5923                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5924                    UNSPEC_VLD3_DUP))]
5925   "TARGET_NEON"
5926 {
5927   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5928     {
5929       int regno = REGNO (operands[0]);
5930       rtx ops[4];
5931       ops[0] = gen_rtx_REG (DImode, regno);
5932       ops[1] = gen_rtx_REG (DImode, regno + 2);
5933       ops[2] = gen_rtx_REG (DImode, regno + 4);
5934       ops[3] = operands[1];
5935       output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5936       return "";
5937     }
5938   else
5939     return "vld1.<V_sz_elem>\t%h0, %A1";
5940 }
5941   [(set (attr "type")
5942       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5943                     (const_string "neon_load3_all_lanes<q>")
5944                     (const_string "neon_load1_1reg<q>")))])
5945
5946 (define_expand "vec_store_lanesei<mode>"
5947   [(set (match_operand:EI 0 "neon_struct_operand")
5948         (unspec:EI [(match_operand:EI 1 "s_register_operand")
5949                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5950                    UNSPEC_VST3))]
5951   "TARGET_NEON")
5952
5953 (define_insn "neon_vst3<mode>"
5954   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5955         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5956                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5957                    UNSPEC_VST3))]
5958   "TARGET_NEON"
5959 {
5960   if (<V_sz_elem> == 64)
5961     return "vst1.64\t%h1, %A0";
5962   else
5963     return "vst3.<V_sz_elem>\t%h1, %A0";
5964 }
5965   [(set (attr "type")
5966       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5967                     (const_string "neon_store1_3reg<q>")
5968                     (const_string "neon_store3_one_lane<q>")))])
5969
5970 (define_expand "vec_store_lanesci<mode>"
5971   [(match_operand:CI 0 "neon_struct_operand")
5972    (match_operand:CI 1 "s_register_operand")
5973    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5974   "TARGET_NEON"
5975 {
5976   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5977   DONE;
5978 })
5979
5980 (define_expand "neon_vst3<mode>"
5981   [(match_operand:CI 0 "neon_struct_operand")
5982    (match_operand:CI 1 "s_register_operand")
5983    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5984   "TARGET_NEON"
5985 {
5986   rtx mem;
5987
5988   mem = adjust_address (operands[0], EImode, 0);
5989   emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5990   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5991   emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5992   DONE;
5993 })
5994
5995 (define_insn "neon_vst3qa<mode>"
5996   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5997         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5998                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5999                    UNSPEC_VST3A))]
6000   "TARGET_NEON"
6001 {
6002   int regno = REGNO (operands[1]);
6003   rtx ops[4];
6004   ops[0] = operands[0];
6005   ops[1] = gen_rtx_REG (DImode, regno);
6006   ops[2] = gen_rtx_REG (DImode, regno + 4);
6007   ops[3] = gen_rtx_REG (DImode, regno + 8);
6008   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6009   return "";
6010 }
6011   [(set_attr "type" "neon_store3_3reg<q>")]
6012 )
6013
6014 (define_insn "neon_vst3qb<mode>"
6015   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6016         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6017                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6018                    UNSPEC_VST3B))]
6019   "TARGET_NEON"
6020 {
6021   int regno = REGNO (operands[1]);
6022   rtx ops[4];
6023   ops[0] = operands[0];
6024   ops[1] = gen_rtx_REG (DImode, regno + 2);
6025   ops[2] = gen_rtx_REG (DImode, regno + 6);
6026   ops[3] = gen_rtx_REG (DImode, regno + 10);
6027   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6028   return "";
6029 }
6030   [(set_attr "type" "neon_store3_3reg<q>")]
6031 )
6032
6033 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6034 ;; here on big endian targets.
6035 (define_insn "neon_vst3_lane<mode>"
6036   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6037         (unspec:<V_three_elem>
6038            [(match_operand:EI 1 "s_register_operand" "w")
6039             (match_operand:SI 2 "immediate_operand" "i")
6040             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6041            UNSPEC_VST3_LANE))]
6042   "TARGET_NEON"
6043 {
6044   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6045   int regno = REGNO (operands[1]);
6046   rtx ops[5];
6047   ops[0] = operands[0];
6048   ops[1] = gen_rtx_REG (DImode, regno);
6049   ops[2] = gen_rtx_REG (DImode, regno + 2);
6050   ops[3] = gen_rtx_REG (DImode, regno + 4);
6051   ops[4] = GEN_INT (lane);
6052   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6053                    ops);
6054   return "";
6055 }
6056   [(set_attr "type" "neon_store3_one_lane<q>")]
6057 )
6058
6059 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6060 ;; here on big endian targets.
6061 (define_insn "neon_vst3_lane<mode>"
6062   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6063         (unspec:<V_three_elem>
6064            [(match_operand:CI 1 "s_register_operand" "w")
6065             (match_operand:SI 2 "immediate_operand" "i")
6066             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6067            UNSPEC_VST3_LANE))]
6068   "TARGET_NEON"
6069 {
6070   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6071   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6072   int regno = REGNO (operands[1]);
6073   rtx ops[5];
6074   if (lane >= max / 2)
6075     {
6076       lane -= max / 2;
6077       regno += 2;
6078     }
6079   ops[0] = operands[0];
6080   ops[1] = gen_rtx_REG (DImode, regno);
6081   ops[2] = gen_rtx_REG (DImode, regno + 4);
6082   ops[3] = gen_rtx_REG (DImode, regno + 8);
6083   ops[4] = GEN_INT (lane);
6084   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6085                    ops);
6086   return "";
6087 }
6088   [(set_attr "type" "neon_store3_one_lane<q>")]
6089 )
6090
6091 (define_expand "vec_load_lanesoi<mode>"
6092   [(set (match_operand:OI 0 "s_register_operand")
6093         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6094                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6095                    UNSPEC_VLD4))]
6096   "TARGET_NEON")
6097
6098 (define_insn "neon_vld4<mode>"
6099   [(set (match_operand:OI 0 "s_register_operand" "=w")
6100         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6101                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6102                    UNSPEC_VLD4))]
6103   "TARGET_NEON"
6104 {
6105   if (<V_sz_elem> == 64)
6106     return "vld1.64\t%h0, %A1";
6107   else
6108     return "vld4.<V_sz_elem>\t%h0, %A1";
6109 }
6110   [(set (attr "type")
6111       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6112                     (const_string "neon_load1_4reg<q>")
6113                     (const_string "neon_load4_4reg<q>")))]
6114 )
6115
6116 (define_expand "vec_load_lanesxi<mode>"
6117   [(match_operand:XI 0 "s_register_operand")
6118    (match_operand:XI 1 "neon_struct_operand")
6119    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6120   "TARGET_NEON"
6121 {
6122   emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6123   DONE;
6124 })
6125
6126 (define_expand "neon_vld4<mode>"
6127   [(match_operand:XI 0 "s_register_operand")
6128    (match_operand:XI 1 "neon_struct_operand")
6129    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6130   "TARGET_NEON"
6131 {
6132   rtx mem;
6133
6134   mem = adjust_address (operands[1], OImode, 0);
6135   emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6136   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6137   emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6138   DONE;
6139 })
6140
6141 (define_insn "neon_vld4qa<mode>"
6142   [(set (match_operand:XI 0 "s_register_operand" "=w")
6143         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6144                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6145                    UNSPEC_VLD4A))]
6146   "TARGET_NEON"
6147 {
6148   int regno = REGNO (operands[0]);
6149   rtx ops[5];
6150   ops[0] = gen_rtx_REG (DImode, regno);
6151   ops[1] = gen_rtx_REG (DImode, regno + 4);
6152   ops[2] = gen_rtx_REG (DImode, regno + 8);
6153   ops[3] = gen_rtx_REG (DImode, regno + 12);
6154   ops[4] = operands[1];
6155   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6156   return "";
6157 }
6158   [(set_attr "type" "neon_load4_4reg<q>")]
6159 )
6160
6161 (define_insn "neon_vld4qb<mode>"
6162   [(set (match_operand:XI 0 "s_register_operand" "=w")
6163         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6164                     (match_operand:XI 2 "s_register_operand" "0")
6165                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6166                    UNSPEC_VLD4B))]
6167   "TARGET_NEON"
6168 {
6169   int regno = REGNO (operands[0]);
6170   rtx ops[5];
6171   ops[0] = gen_rtx_REG (DImode, regno + 2);
6172   ops[1] = gen_rtx_REG (DImode, regno + 6);
6173   ops[2] = gen_rtx_REG (DImode, regno + 10);
6174   ops[3] = gen_rtx_REG (DImode, regno + 14);
6175   ops[4] = operands[1];
6176   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6177   return "";
6178 }
6179   [(set_attr "type" "neon_load4_4reg<q>")]
6180 )
6181
6182 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6183 ;; here on big endian targets.
6184 (define_insn "neon_vld4_lane<mode>"
6185   [(set (match_operand:OI 0 "s_register_operand" "=w")
6186         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6187                     (match_operand:OI 2 "s_register_operand" "0")
6188                     (match_operand:SI 3 "immediate_operand" "i")
6189                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6190                    UNSPEC_VLD4_LANE))]
6191   "TARGET_NEON"
6192 {
6193   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6194   int regno = REGNO (operands[0]);
6195   rtx ops[6];
6196   ops[0] = gen_rtx_REG (DImode, regno);
6197   ops[1] = gen_rtx_REG (DImode, regno + 2);
6198   ops[2] = gen_rtx_REG (DImode, regno + 4);
6199   ops[3] = gen_rtx_REG (DImode, regno + 6);
6200   ops[4] = operands[1];
6201   ops[5] = GEN_INT (lane);
6202   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6203                    ops);
6204   return "";
6205 }
6206   [(set_attr "type" "neon_load4_one_lane<q>")]
6207 )
6208
6209 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6210 ;; here on big endian targets.
6211 (define_insn "neon_vld4_lane<mode>"
6212   [(set (match_operand:XI 0 "s_register_operand" "=w")
6213         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6214                     (match_operand:XI 2 "s_register_operand" "0")
6215                     (match_operand:SI 3 "immediate_operand" "i")
6216                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6217                    UNSPEC_VLD4_LANE))]
6218   "TARGET_NEON"
6219 {
6220   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6221   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6222   int regno = REGNO (operands[0]);
6223   rtx ops[6];
6224   if (lane >= max / 2)
6225     {
6226       lane -= max / 2;
6227       regno += 2;
6228     }
6229   ops[0] = gen_rtx_REG (DImode, regno);
6230   ops[1] = gen_rtx_REG (DImode, regno + 4);
6231   ops[2] = gen_rtx_REG (DImode, regno + 8);
6232   ops[3] = gen_rtx_REG (DImode, regno + 12);
6233   ops[4] = operands[1];
6234   ops[5] = GEN_INT (lane);
6235   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6236                    ops);
6237   return "";
6238 }
6239   [(set_attr "type" "neon_load4_one_lane<q>")]
6240 )
6241
6242 (define_insn "neon_vld4_dup<mode>"
6243   [(set (match_operand:OI 0 "s_register_operand" "=w")
6244         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6245                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6246                    UNSPEC_VLD4_DUP))]
6247   "TARGET_NEON"
6248 {
6249   if (GET_MODE_NUNITS (<MODE>mode) > 1)
6250     {
6251       int regno = REGNO (operands[0]);
6252       rtx ops[5];
6253       ops[0] = gen_rtx_REG (DImode, regno);
6254       ops[1] = gen_rtx_REG (DImode, regno + 2);
6255       ops[2] = gen_rtx_REG (DImode, regno + 4);
6256       ops[3] = gen_rtx_REG (DImode, regno + 6);
6257       ops[4] = operands[1];
6258       output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6259                        ops);
6260       return "";
6261     }
6262   else
6263     return "vld1.<V_sz_elem>\t%h0, %A1";
6264 }
6265   [(set (attr "type")
6266       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6267                     (const_string "neon_load4_all_lanes<q>")
6268                     (const_string "neon_load1_1reg<q>")))]
6269 )
6270
6271 (define_expand "vec_store_lanesoi<mode>"
6272   [(set (match_operand:OI 0 "neon_struct_operand")
6273         (unspec:OI [(match_operand:OI 1 "s_register_operand")
6274                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6275                    UNSPEC_VST4))]
6276   "TARGET_NEON")
6277
6278 (define_insn "neon_vst4<mode>"
6279   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6280         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6281                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6282                    UNSPEC_VST4))]
6283   "TARGET_NEON"
6284 {
6285   if (<V_sz_elem> == 64)
6286     return "vst1.64\t%h1, %A0";
6287   else
6288     return "vst4.<V_sz_elem>\t%h1, %A0";
6289 }
6290   [(set (attr "type")
6291       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6292                     (const_string "neon_store1_4reg<q>")
6293                     (const_string "neon_store4_4reg<q>")))]
6294 )
6295
6296 (define_expand "vec_store_lanesxi<mode>"
6297   [(match_operand:XI 0 "neon_struct_operand")
6298    (match_operand:XI 1 "s_register_operand")
6299    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6300   "TARGET_NEON"
6301 {
6302   emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6303   DONE;
6304 })
6305
6306 (define_expand "neon_vst4<mode>"
6307   [(match_operand:XI 0 "neon_struct_operand")
6308    (match_operand:XI 1 "s_register_operand")
6309    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6310   "TARGET_NEON"
6311 {
6312   rtx mem;
6313
6314   mem = adjust_address (operands[0], OImode, 0);
6315   emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6316   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6317   emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6318   DONE;
6319 })
6320
6321 (define_insn "neon_vst4qa<mode>"
6322   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6323         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6324                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6325                    UNSPEC_VST4A))]
6326   "TARGET_NEON"
6327 {
6328   int regno = REGNO (operands[1]);
6329   rtx ops[5];
6330   ops[0] = operands[0];
6331   ops[1] = gen_rtx_REG (DImode, regno);
6332   ops[2] = gen_rtx_REG (DImode, regno + 4);
6333   ops[3] = gen_rtx_REG (DImode, regno + 8);
6334   ops[4] = gen_rtx_REG (DImode, regno + 12);
6335   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6336   return "";
6337 }
6338   [(set_attr "type" "neon_store4_4reg<q>")]
6339 )
6340
6341 (define_insn "neon_vst4qb<mode>"
6342   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6343         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6344                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6345                    UNSPEC_VST4B))]
6346   "TARGET_NEON"
6347 {
6348   int regno = REGNO (operands[1]);
6349   rtx ops[5];
6350   ops[0] = operands[0];
6351   ops[1] = gen_rtx_REG (DImode, regno + 2);
6352   ops[2] = gen_rtx_REG (DImode, regno + 6);
6353   ops[3] = gen_rtx_REG (DImode, regno + 10);
6354   ops[4] = gen_rtx_REG (DImode, regno + 14);
6355   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6356   return "";
6357 }
6358   [(set_attr "type" "neon_store4_4reg<q>")]
6359 )
6360
6361 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6362 ;; here on big endian targets.
6363 (define_insn "neon_vst4_lane<mode>"
6364   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6365         (unspec:<V_four_elem>
6366            [(match_operand:OI 1 "s_register_operand" "w")
6367             (match_operand:SI 2 "immediate_operand" "i")
6368             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6369            UNSPEC_VST4_LANE))]
6370   "TARGET_NEON"
6371 {
6372   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6373   int regno = REGNO (operands[1]);
6374   rtx ops[6];
6375   ops[0] = operands[0];
6376   ops[1] = gen_rtx_REG (DImode, regno);
6377   ops[2] = gen_rtx_REG (DImode, regno + 2);
6378   ops[3] = gen_rtx_REG (DImode, regno + 4);
6379   ops[4] = gen_rtx_REG (DImode, regno + 6);
6380   ops[5] = GEN_INT (lane);
6381   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6382                    ops);
6383   return "";
6384 }
6385   [(set_attr "type" "neon_store4_one_lane<q>")]
6386 )
6387
6388 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6389 ;; here on big endian targets.
6390 (define_insn "neon_vst4_lane<mode>"
6391   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6392         (unspec:<V_four_elem>
6393            [(match_operand:XI 1 "s_register_operand" "w")
6394             (match_operand:SI 2 "immediate_operand" "i")
6395             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6396            UNSPEC_VST4_LANE))]
6397   "TARGET_NEON"
6398 {
6399   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6400   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6401   int regno = REGNO (operands[1]);
6402   rtx ops[6];
6403   if (lane >= max / 2)
6404     {
6405       lane -= max / 2;
6406       regno += 2;
6407     }
6408   ops[0] = operands[0];
6409   ops[1] = gen_rtx_REG (DImode, regno);
6410   ops[2] = gen_rtx_REG (DImode, regno + 4);
6411   ops[3] = gen_rtx_REG (DImode, regno + 8);
6412   ops[4] = gen_rtx_REG (DImode, regno + 12);
6413   ops[5] = GEN_INT (lane);
6414   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6415                    ops);
6416   return "";
6417 }
6418   [(set_attr "type" "neon_store4_4reg<q>")]
6419 )
6420
6421 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6422   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6423         (SE:<V_unpack> (vec_select:<V_HALF>
6424                           (match_operand:VU 1 "register_operand" "w")
6425                           (match_operand:VU 2 "vect_par_constant_low" ""))))]
6426   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6427   "vmovl.<US><V_sz_elem> %q0, %e1"
6428   [(set_attr "type" "neon_shift_imm_long")]
6429 )
6430
6431 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6432   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6433         (SE:<V_unpack> (vec_select:<V_HALF>
6434                           (match_operand:VU 1 "register_operand" "w")
6435                           (match_operand:VU 2 "vect_par_constant_high" ""))))]
6436   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6437   "vmovl.<US><V_sz_elem> %q0, %f1"
6438   [(set_attr "type" "neon_shift_imm_long")]
6439 )
6440
6441 (define_expand "vec_unpack<US>_hi_<mode>"
6442   [(match_operand:<V_unpack> 0 "register_operand" "")
6443    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6444  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6445   {
6446    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6447    rtx t1;
6448    int i;
6449    for (i = 0; i < (<V_mode_nunits>/2); i++)
6450      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6451
6452    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6453    emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6454                                                  operands[1],
6455                                                  t1));
6456    DONE;
6457   }
6458 )
6459
6460 (define_expand "vec_unpack<US>_lo_<mode>"
6461   [(match_operand:<V_unpack> 0 "register_operand" "")
6462    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6463  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6464   {
6465    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6466    rtx t1;
6467    int i;
6468    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6469      RTVEC_ELT (v, i) = GEN_INT (i);
6470    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6471    emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6472                                                  operands[1],
6473                                                  t1));
6474    DONE;
6475   }
6476 )
6477
6478 (define_insn "neon_vec_<US>mult_lo_<mode>"
6479  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6480        (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6481                            (match_operand:VU 1 "register_operand" "w")
6482                            (match_operand:VU 2 "vect_par_constant_low" "")))
6483                         (SE:<V_unpack> (vec_select:<V_HALF>
6484                            (match_operand:VU 3 "register_operand" "w")
6485                            (match_dup 2)))))]
6486   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6487   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6488   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6489 )
6490
6491 (define_expand "vec_widen_<US>mult_lo_<mode>"
6492   [(match_operand:<V_unpack> 0 "register_operand" "")
6493    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6494    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6495  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6496  {
6497    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6498    rtx t1;
6499    int i;
6500    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6501      RTVEC_ELT (v, i) = GEN_INT (i);
6502    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6503
6504    emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6505                                                operands[1],
6506                                                t1,
6507                                                operands[2]));
6508    DONE;
6509  }
6510 )
6511
6512 (define_insn "neon_vec_<US>mult_hi_<mode>"
6513  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6514       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6515                             (match_operand:VU 1 "register_operand" "w")
6516                             (match_operand:VU 2 "vect_par_constant_high" "")))
6517                        (SE:<V_unpack> (vec_select:<V_HALF>
6518                             (match_operand:VU 3 "register_operand" "w")
6519                             (match_dup 2)))))]
6520   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6521   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6522   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6523 )
6524
6525 (define_expand "vec_widen_<US>mult_hi_<mode>"
6526   [(match_operand:<V_unpack> 0 "register_operand" "")
6527    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6528    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6529  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6530  {
6531    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6532    rtx t1;
6533    int i;
6534    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6535      RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6536    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6537
6538    emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6539                                                operands[1],
6540                                                t1,
6541                                                operands[2]));
6542    DONE;
6543
6544  }
6545 )
6546
6547 (define_insn "neon_vec_<US>shiftl_<mode>"
6548  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6549        (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6550        (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6551   "TARGET_NEON"
6552 {
6553   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6554 }
6555   [(set_attr "type" "neon_shift_imm_long")]
6556 )
6557
6558 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6559   [(match_operand:<V_unpack> 0 "register_operand" "")
6560    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6561    (match_operand:SI 2 "immediate_operand" "i")]
6562  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6563  {
6564   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6565                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6566                 operands[2]));
6567    DONE;
6568  }
6569 )
6570
6571 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6572   [(match_operand:<V_unpack> 0 "register_operand" "")
6573    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6574    (match_operand:SI 2 "immediate_operand" "i")]
6575  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6576  {
6577   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6578                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6579                                      GET_MODE_SIZE (<V_HALF>mode)),
6580                 operands[2]));
6581    DONE;
6582  }
6583 )
6584
6585 ;; Vectorize for non-neon-quad case
6586 (define_insn "neon_unpack<US>_<mode>"
6587  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6588        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6589  "TARGET_NEON"
6590  "vmovl.<US><V_sz_elem> %q0, %P1"
6591   [(set_attr "type" "neon_move")]
6592 )
6593
6594 (define_expand "vec_unpack<US>_lo_<mode>"
6595  [(match_operand:<V_double_width> 0 "register_operand" "")
6596   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6597  "TARGET_NEON"
6598 {
6599   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6600   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6601   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6602
6603   DONE;
6604 }
6605 )
6606
6607 (define_expand "vec_unpack<US>_hi_<mode>"
6608  [(match_operand:<V_double_width> 0 "register_operand" "")
6609   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6610  "TARGET_NEON"
6611 {
6612   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6613   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6614   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6615
6616   DONE;
6617 }
6618 )
6619
6620 (define_insn "neon_vec_<US>mult_<mode>"
6621  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6622        (mult:<V_widen> (SE:<V_widen>
6623                            (match_operand:VDI 1 "register_operand" "w"))
6624                        (SE:<V_widen>
6625                            (match_operand:VDI 2 "register_operand" "w"))))]
6626   "TARGET_NEON"
6627   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6628   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6629 )
6630
6631 (define_expand "vec_widen_<US>mult_hi_<mode>"
6632   [(match_operand:<V_double_width> 0 "register_operand" "")
6633    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6634    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6635  "TARGET_NEON"
6636  {
6637    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6638    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6639    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6640
6641    DONE;
6642
6643  }
6644 )
6645
6646 (define_expand "vec_widen_<US>mult_lo_<mode>"
6647   [(match_operand:<V_double_width> 0 "register_operand" "")
6648    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6649    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6650  "TARGET_NEON"
6651  {
6652    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6653    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6654    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6655
6656    DONE;
6657
6658  }
6659 )
6660
6661 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6662  [(match_operand:<V_double_width> 0 "register_operand" "")
6663    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6664    (match_operand:SI 2 "immediate_operand" "i")]
6665  "TARGET_NEON"
6666  {
6667    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6668    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6669    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6670
6671    DONE;
6672  }
6673 )
6674
6675 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6676   [(match_operand:<V_double_width> 0 "register_operand" "")
6677    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6678    (match_operand:SI 2 "immediate_operand" "i")]
6679  "TARGET_NEON"
6680  {
6681    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6682    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6683    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6684
6685    DONE;
6686  }
6687 )
6688
6689 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6690 ; because the ordering of vector elements in Q registers is different from what
6691 ; the semantics of the instructions require.
6692
6693 (define_insn "vec_pack_trunc_<mode>"
6694  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6695        (vec_concat:<V_narrow_pack>
6696                 (truncate:<V_narrow>
6697                         (match_operand:VN 1 "register_operand" "w"))
6698                 (truncate:<V_narrow>
6699                         (match_operand:VN 2 "register_operand" "w"))))]
6700  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6701  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6702  [(set_attr "type" "multiple")
6703   (set_attr "length" "8")]
6704 )
6705
6706 ;; For the non-quad case.
6707 (define_insn "neon_vec_pack_trunc_<mode>"
6708  [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6709        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6710  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6711  "vmovn.i<V_sz_elem>\t%P0, %q1"
6712  [(set_attr "type" "neon_move_narrow_q")]
6713 )
6714
6715 (define_expand "vec_pack_trunc_<mode>"
6716  [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6717   (match_operand:VSHFT 1 "register_operand" "")
6718   (match_operand:VSHFT 2 "register_operand")]
6719  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6720 {
6721   rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6722
6723   emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6724   emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6725   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6726   DONE;
6727 })
6728
6729 (define_insn "neon_vabd<mode>_2"
6730  [(set (match_operand:VF 0 "s_register_operand" "=w")
6731        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6732                          (match_operand:VF 2 "s_register_operand" "w"))))]
6733  "TARGET_NEON && flag_unsafe_math_optimizations"
6734  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6735  [(set_attr "type" "neon_fp_abd_s<q>")]
6736 )
6737
6738 (define_insn "neon_vabd<mode>_3"
6739  [(set (match_operand:VF 0 "s_register_operand" "=w")
6740        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6741                             (match_operand:VF 2 "s_register_operand" "w")]
6742                 UNSPEC_VSUB)))]
6743  "TARGET_NEON && flag_unsafe_math_optimizations"
6744  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6745  [(set_attr "type" "neon_fp_abd_s<q>")]
6746 )
6747
6748 ;; Copy from core-to-neon regs, then extend, not vice-versa
6749
6750 (define_split
6751   [(set (match_operand:DI 0 "s_register_operand" "")
6752         (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6753   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6754   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6755    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6756   {
6757     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6758   })
6759
6760 (define_split
6761   [(set (match_operand:DI 0 "s_register_operand" "")
6762         (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6763   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6764   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6765    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6766   {
6767     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6768   })
6769
6770 (define_split
6771   [(set (match_operand:DI 0 "s_register_operand" "")
6772         (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6773   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6774   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6775    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6776   {
6777     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6778   })
6779
6780 (define_split
6781   [(set (match_operand:DI 0 "s_register_operand" "")
6782         (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6783   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6784   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6785    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6786   {
6787     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6788   })
6789
6790 (define_split
6791   [(set (match_operand:DI 0 "s_register_operand" "")
6792         (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6793   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6794   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6795    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6796   {
6797     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6798   })
6799
6800 (define_split
6801   [(set (match_operand:DI 0 "s_register_operand" "")
6802         (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6803   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6804   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6805    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6806   {
6807     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6808   })