PR rtl-optimization/88018
[official-gcc.git] / gcc / config / arm / neon.md
blob07572e4e62c32194b8867798ff45b6055a3617d3
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27   [(set (match_operand:VDX 0 "nonimmediate_operand"
28           "=w,Un,w, w,  ?r,?w,?r, ?Us")
29         (match_operand:VDX 1 "general_operand"
30           " w,w, Dn,Uni, w, r, Usi,r"))]
31   "TARGET_NEON
32    && (register_operand (operands[0], <MODE>mode)
33        || register_operand (operands[1], <MODE>mode))"
35   if (which_alternative == 2)
36     {
37       int width, is_valid;
38       static char templ[40];
40       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41         &operands[1], &width);
43       gcc_assert (is_valid != 0);
45       if (width == 0)
46         return "vmov.f32\t%P0, %1  @ <mode>";
47       else
48         sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
50       return templ;
51     }
53   switch (which_alternative)
54     {
55     case 0: return "vmov\t%P0, %P1  @ <mode>";
56     case 1: case 3: return output_move_neon (operands);
57     case 2: gcc_unreachable ();
58     case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
59     case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
60     default: return output_move_double (operands, true, NULL);
61     }
63  [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64                     neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65                     neon_load1_2reg, neon_store1_2reg")
66   (set_attr "length" "4,4,4,4,4,4,8,8")
67   (set_attr "arm_pool_range"     "*,*,*,1020,*,*,1020,*")
68   (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,1018,*")
69   (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73           "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
74         (match_operand:VQXMOV 1 "general_operand"
75           " w,w, Dn,Uni, w, r, r, Usi, r"))]
76   "TARGET_NEON
77    && (register_operand (operands[0], <MODE>mode)
78        || register_operand (operands[1], <MODE>mode))"
80   if (which_alternative == 2)
81     {
82       int width, is_valid;
83       static char templ[40];
85       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86         &operands[1], &width);
88       gcc_assert (is_valid != 0);
90       if (width == 0)
91         return "vmov.f32\t%q0, %1  @ <mode>";
92       else
93         sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
95       return templ;
96     }
98   switch (which_alternative)
99     {
100     case 0: return "vmov\t%q0, %q1  @ <mode>";
101     case 1: case 3: return output_move_neon (operands);
102     case 2: gcc_unreachable ();
103     case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
104     case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
105     default: return output_move_quad (operands);
106     }
108   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109                      neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110                      mov_reg,neon_load1_4reg,neon_store1_4reg")
111    (set_attr "length" "4,8,4,8,8,8,16,8,16")
112    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113    (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114    (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 /* We define these mov expanders to match the standard mov$a optab to prevent
117    the mid-end from trying to do a subreg for these modes which is the most
118    inefficient way to expand the move.  Also big-endian subreg's aren't
119    allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
120    Without these RTL generation patterns the mid-end would attempt to take a
121    sub-reg and may ICE if it can't.  */
123 (define_expand "movti"
124   [(set (match_operand:TI 0 "nonimmediate_operand" "")
125         (match_operand:TI 1 "general_operand" ""))]
126   "TARGET_NEON"
128   if (can_create_pseudo_p ())
129     {
130       if (!REG_P (operands[0]))
131         operands[1] = force_reg (TImode, operands[1]);
132     }
135 (define_expand "mov<mode>"
136   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
137         (match_operand:VSTRUCT 1 "general_operand" ""))]
138   "TARGET_NEON"
140   if (can_create_pseudo_p ())
141     {
142       if (!REG_P (operands[0]))
143         operands[1] = force_reg (<MODE>mode, operands[1]);
144     }
147 (define_expand "mov<mode>"
148   [(set (match_operand:VH 0 "s_register_operand")
149         (match_operand:VH 1 "s_register_operand"))]
150   "TARGET_NEON"
152   if (can_create_pseudo_p ())
153     {
154       if (!REG_P (operands[0]))
155         operands[1] = force_reg (<MODE>mode, operands[1]);
156     }
159 (define_insn "*neon_mov<mode>"
160   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
161         (match_operand:VSTRUCT 1 "general_operand"      " w,w, Ut"))]
162   "TARGET_NEON
163    && (register_operand (operands[0], <MODE>mode)
164        || register_operand (operands[1], <MODE>mode))"
166   switch (which_alternative)
167     {
168     case 0: return "#";
169     case 1: case 2: return output_move_neon (operands);
170     default: gcc_unreachable ();
171     }
173   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
174    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
176 (define_split
177   [(set (match_operand:EI 0 "s_register_operand" "")
178         (match_operand:EI 1 "s_register_operand" ""))]
179   "TARGET_NEON && reload_completed"
180   [(set (match_dup 0) (match_dup 1))
181    (set (match_dup 2) (match_dup 3))]
183   int rdest = REGNO (operands[0]);
184   int rsrc = REGNO (operands[1]);
185   rtx dest[2], src[2];
187   dest[0] = gen_rtx_REG (TImode, rdest);
188   src[0] = gen_rtx_REG (TImode, rsrc);
189   dest[1] = gen_rtx_REG (DImode, rdest + 4);
190   src[1] = gen_rtx_REG (DImode, rsrc + 4);
192   neon_disambiguate_copy (operands, dest, src, 2);
195 (define_split
196   [(set (match_operand:OI 0 "s_register_operand" "")
197         (match_operand:OI 1 "s_register_operand" ""))]
198   "TARGET_NEON && reload_completed"
199   [(set (match_dup 0) (match_dup 1))
200    (set (match_dup 2) (match_dup 3))]
202   int rdest = REGNO (operands[0]);
203   int rsrc = REGNO (operands[1]);
204   rtx dest[2], src[2];
206   dest[0] = gen_rtx_REG (TImode, rdest);
207   src[0] = gen_rtx_REG (TImode, rsrc);
208   dest[1] = gen_rtx_REG (TImode, rdest + 4);
209   src[1] = gen_rtx_REG (TImode, rsrc + 4);
211   neon_disambiguate_copy (operands, dest, src, 2);
214 (define_split
215   [(set (match_operand:CI 0 "s_register_operand" "")
216         (match_operand:CI 1 "s_register_operand" ""))]
217   "TARGET_NEON && reload_completed"
218   [(set (match_dup 0) (match_dup 1))
219    (set (match_dup 2) (match_dup 3))
220    (set (match_dup 4) (match_dup 5))]
222   int rdest = REGNO (operands[0]);
223   int rsrc = REGNO (operands[1]);
224   rtx dest[3], src[3];
226   dest[0] = gen_rtx_REG (TImode, rdest);
227   src[0] = gen_rtx_REG (TImode, rsrc);
228   dest[1] = gen_rtx_REG (TImode, rdest + 4);
229   src[1] = gen_rtx_REG (TImode, rsrc + 4);
230   dest[2] = gen_rtx_REG (TImode, rdest + 8);
231   src[2] = gen_rtx_REG (TImode, rsrc + 8);
233   neon_disambiguate_copy (operands, dest, src, 3);
236 (define_split
237   [(set (match_operand:XI 0 "s_register_operand" "")
238         (match_operand:XI 1 "s_register_operand" ""))]
239   "TARGET_NEON && reload_completed"
240   [(set (match_dup 0) (match_dup 1))
241    (set (match_dup 2) (match_dup 3))
242    (set (match_dup 4) (match_dup 5))
243    (set (match_dup 6) (match_dup 7))]
245   int rdest = REGNO (operands[0]);
246   int rsrc = REGNO (operands[1]);
247   rtx dest[4], src[4];
249   dest[0] = gen_rtx_REG (TImode, rdest);
250   src[0] = gen_rtx_REG (TImode, rsrc);
251   dest[1] = gen_rtx_REG (TImode, rdest + 4);
252   src[1] = gen_rtx_REG (TImode, rsrc + 4);
253   dest[2] = gen_rtx_REG (TImode, rdest + 8);
254   src[2] = gen_rtx_REG (TImode, rsrc + 8);
255   dest[3] = gen_rtx_REG (TImode, rdest + 12);
256   src[3] = gen_rtx_REG (TImode, rsrc + 12);
258   neon_disambiguate_copy (operands, dest, src, 4);
261 (define_expand "movmisalign<mode>"
262   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
263         (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
264                      UNSPEC_MISALIGNED_ACCESS))]
265   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
267   rtx adjust_mem;
268   /* This pattern is not permitted to fail during expansion: if both arguments
269      are non-registers (e.g. memory := constant, which can be created by the
270      auto-vectorizer), force operand 1 into a register.  */
271   if (!s_register_operand (operands[0], <MODE>mode)
272       && !s_register_operand (operands[1], <MODE>mode))
273     operands[1] = force_reg (<MODE>mode, operands[1]);
275   if (s_register_operand (operands[0], <MODE>mode))
276     adjust_mem = operands[1];
277   else
278     adjust_mem = operands[0];
280   /* Legitimize address.  */
281   if (!neon_vector_mem_operand (adjust_mem, 2, true))
282     XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
286 (define_insn "*movmisalign<mode>_neon_store"
287   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"   "=Um")
288         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
289                     UNSPEC_MISALIGNED_ACCESS))]
290   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
291   "vst1.<V_sz_elem>\t{%P1}, %A0"
292   [(set_attr "type" "neon_store1_1reg<q>")])
294 (define_insn "*movmisalign<mode>_neon_load"
295   [(set (match_operand:VDX 0 "s_register_operand"                       "=w")
296         (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
297                                                                         " Um")]
298                     UNSPEC_MISALIGNED_ACCESS))]
299   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
300   "vld1.<V_sz_elem>\t{%P0}, %A1"
301   [(set_attr "type" "neon_load1_1reg<q>")])
303 (define_insn "*movmisalign<mode>_neon_store"
304   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
305         (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
306                     UNSPEC_MISALIGNED_ACCESS))]
307   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
308   "vst1.<V_sz_elem>\t{%q1}, %A0"
309   [(set_attr "type" "neon_store1_1reg<q>")])
311 (define_insn "*movmisalign<mode>_neon_load"
312   [(set (match_operand:VQX 0 "s_register_operand"                       "=w")
313         (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
314                                                                         " Um")]
315                     UNSPEC_MISALIGNED_ACCESS))]
316   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
317   "vld1.<V_sz_elem>\t{%q0}, %A1"
318   [(set_attr "type" "neon_load1_1reg<q>")])
320 (define_insn "vec_set<mode>_internal"
321   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
322         (vec_merge:VD_LANE
323           (vec_duplicate:VD_LANE
324             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
325           (match_operand:VD_LANE 3 "s_register_operand" "0,0")
326           (match_operand:SI 2 "immediate_operand" "i,i")))]
327   "TARGET_NEON"
329   int elt = ffs ((int) INTVAL (operands[2])) - 1;
330   if (BYTES_BIG_ENDIAN)
331     elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
332   operands[2] = GEN_INT (elt);
334   if (which_alternative == 0)
335     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
336   else
337     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
339   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
341 (define_insn "vec_set<mode>_internal"
342   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
343         (vec_merge:VQ2
344           (vec_duplicate:VQ2
345             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
346           (match_operand:VQ2 3 "s_register_operand" "0,0")
347           (match_operand:SI 2 "immediate_operand" "i,i")))]
348   "TARGET_NEON"
350   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
351   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
352   int elt = elem % half_elts;
353   int hi = (elem / half_elts) * 2;
354   int regno = REGNO (operands[0]);
356   if (BYTES_BIG_ENDIAN)
357     elt = half_elts - 1 - elt;
359   operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
360   operands[2] = GEN_INT (elt);
362   if (which_alternative == 0)
363     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
364   else
365     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
367   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
370 (define_insn "vec_setv2di_internal"
371   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
372         (vec_merge:V2DI
373           (vec_duplicate:V2DI
374             (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
375           (match_operand:V2DI 3 "s_register_operand" "0,0")
376           (match_operand:SI 2 "immediate_operand" "i,i")))]
377   "TARGET_NEON"
379   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
380   int regno = REGNO (operands[0]) + 2 * elem;
382   operands[0] = gen_rtx_REG (DImode, regno);
384   if (which_alternative == 0)
385     return "vld1.64\t%P0, %A1";
386   else
387     return "vmov\t%P0, %Q1, %R1";
389   [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
392 (define_expand "vec_set<mode>"
393   [(match_operand:VDQ 0 "s_register_operand" "")
394    (match_operand:<V_elem> 1 "s_register_operand" "")
395    (match_operand:SI 2 "immediate_operand" "")]
396   "TARGET_NEON"
398   HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
399   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
400                                          GEN_INT (elem), operands[0]));
401   DONE;
404 (define_insn "vec_extract<mode><V_elem_l>"
405   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
406         (vec_select:<V_elem>
407           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
408           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
409   "TARGET_NEON"
411   if (BYTES_BIG_ENDIAN)
412     {
413       int elt = INTVAL (operands[2]);
414       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
415       operands[2] = GEN_INT (elt);
416     }
418   if (which_alternative == 0)
419     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
420   else
421     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
423   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
426 (define_insn "vec_extract<mode><V_elem_l>"
427   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
428         (vec_select:<V_elem>
429           (match_operand:VQ2 1 "s_register_operand" "w,w")
430           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
431   "TARGET_NEON"
433   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
434   int elt = INTVAL (operands[2]) % half_elts;
435   int hi = (INTVAL (operands[2]) / half_elts) * 2;
436   int regno = REGNO (operands[1]);
438   if (BYTES_BIG_ENDIAN)
439     elt = half_elts - 1 - elt;
441   operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
442   operands[2] = GEN_INT (elt);
444   if (which_alternative == 0)
445     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
446   else
447     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
449   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
452 (define_insn "vec_extractv2didi"
453   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
454         (vec_select:DI
455           (match_operand:V2DI 1 "s_register_operand" "w,w")
456           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
457   "TARGET_NEON"
459   int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
461   operands[1] = gen_rtx_REG (DImode, regno);
463   if (which_alternative == 0)
464     return "vst1.64\t{%P1}, %A0  @ v2di";
465   else
466     return "vmov\t%Q0, %R0, %P1  @ v2di";
468   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
471 (define_expand "vec_init<mode><V_elem_l>"
472   [(match_operand:VDQ 0 "s_register_operand" "")
473    (match_operand 1 "" "")]
474   "TARGET_NEON"
476   neon_expand_vector_init (operands[0], operands[1]);
477   DONE;
480 ;; Doubleword and quadword arithmetic.
482 ;; NOTE: some other instructions also support 64-bit integer
483 ;; element size, which we could potentially use for "long long" operations.
485 (define_insn "*add<mode>3_neon"
486   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
487         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
488                   (match_operand:VDQ 2 "s_register_operand" "w")))]
489   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
490   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
491   [(set (attr "type")
492       (if_then_else (match_test "<Is_float_mode>")
493                     (const_string "neon_fp_addsub_s<q>")
494                     (const_string "neon_add<q>")))]
497 ;; As with SFmode, full support for HFmode vector arithmetic is only available
498 ;; when flag-unsafe-math-optimizations is enabled.
500 (define_insn "add<mode>3"
501   [(set
502     (match_operand:VH 0 "s_register_operand" "=w")
503     (plus:VH
504      (match_operand:VH 1 "s_register_operand" "w")
505      (match_operand:VH 2 "s_register_operand" "w")))]
506  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
507  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
508  [(set (attr "type")
509    (if_then_else (match_test "<Is_float_mode>")
510     (const_string "neon_fp_addsub_s<q>")
511     (const_string "neon_add<q>")))]
514 (define_insn "add<mode>3_fp16"
515   [(set
516     (match_operand:VH 0 "s_register_operand" "=w")
517     (plus:VH
518      (match_operand:VH 1 "s_register_operand" "w")
519      (match_operand:VH 2 "s_register_operand" "w")))]
520  "TARGET_NEON_FP16INST"
521  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
522  [(set (attr "type")
523    (if_then_else (match_test "<Is_float_mode>")
524     (const_string "neon_fp_addsub_s<q>")
525     (const_string "neon_add<q>")))]
528 (define_insn "adddi3_neon"
529   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
530         (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
531                  (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
532    (clobber (reg:CC CC_REGNUM))]
533   "TARGET_NEON"
535   switch (which_alternative)
536     {
537     case 0: /* fall through */
538     case 3: return "vadd.i64\t%P0, %P1, %P2";
539     case 1: return "#";
540     case 2: return "#";
541     case 4: return "#";
542     case 5: return "#";
543     case 6: return "#";
544     default: gcc_unreachable ();
545     }
547   [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
548                      multiple,multiple,multiple")
549    (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
550    (set_attr "length" "*,8,8,*,8,8,8")
551    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
554 (define_insn "*sub<mode>3_neon"
555   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
556         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
557                    (match_operand:VDQ 2 "s_register_operand" "w")))]
558   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
559   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
560   [(set (attr "type")
561       (if_then_else (match_test "<Is_float_mode>")
562                     (const_string "neon_fp_addsub_s<q>")
563                     (const_string "neon_sub<q>")))]
566 (define_insn "sub<mode>3"
567  [(set
568    (match_operand:VH 0 "s_register_operand" "=w")
569    (minus:VH
570     (match_operand:VH 1 "s_register_operand" "w")
571     (match_operand:VH 2 "s_register_operand" "w")))]
572  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
573  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
574  [(set_attr "type" "neon_sub<q>")]
577 (define_insn "sub<mode>3_fp16"
578  [(set
579    (match_operand:VH 0 "s_register_operand" "=w")
580    (minus:VH
581     (match_operand:VH 1 "s_register_operand" "w")
582     (match_operand:VH 2 "s_register_operand" "w")))]
583  "TARGET_NEON_FP16INST"
584  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585  [(set_attr "type" "neon_sub<q>")]
588 (define_insn "subdi3_neon"
589   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
590         (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
591                   (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
592    (clobber (reg:CC CC_REGNUM))]
593   "TARGET_NEON"
595   switch (which_alternative)
596     {
597     case 0: /* fall through */
598     case 4: return "vsub.i64\t%P0, %P1, %P2";
599     case 1: /* fall through */ 
600     case 2: /* fall through */
601     case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
602     default: gcc_unreachable ();
603     }
605   [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
606    (set_attr "conds" "*,clob,clob,clob,*")
607    (set_attr "length" "*,8,8,8,*")
608    (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
611 (define_insn "*mul<mode>3_neon"
612   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
613         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
614                    (match_operand:VDQW 2 "s_register_operand" "w")))]
615   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
616   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
617   [(set (attr "type")
618       (if_then_else (match_test "<Is_float_mode>")
619                     (const_string "neon_fp_mul_s<q>")
620                     (const_string "neon_mul_<V_elem_ch><q>")))]
623 /* Perform division using multiply-by-reciprocal.
624    Reciprocal is calculated using Newton-Raphson method.
625    Enabled with -funsafe-math-optimizations -freciprocal-math
626    and disabled for -Os since it increases code size .  */
628 (define_expand "div<mode>3"
629   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
630         (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
631                   (match_operand:VCVTF 2 "s_register_operand" "w")))]
632   "TARGET_NEON && !optimize_size
633    && flag_reciprocal_math"
634   {
635     rtx rec = gen_reg_rtx (<MODE>mode);
636     rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
638     /* Reciprocal estimate.  */
639     emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
641     /* Perform 2 iterations of newton-raphson method.  */
642     for (int i = 0; i < 2; i++)
643       {
644         emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
645         emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
646       }
648     /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
649     emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
650     DONE;
651   }
655 (define_insn "mul<mode>3add<mode>_neon"
656   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
657         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
658                             (match_operand:VDQW 3 "s_register_operand" "w"))
659                   (match_operand:VDQW 1 "s_register_operand" "0")))]
660   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
661   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
662   [(set (attr "type")
663       (if_then_else (match_test "<Is_float_mode>")
664                     (const_string "neon_fp_mla_s<q>")
665                     (const_string "neon_mla_<V_elem_ch><q>")))]
668 (define_insn "mul<mode>3add<mode>_neon"
669   [(set (match_operand:VH 0 "s_register_operand" "=w")
670         (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
671                           (match_operand:VH 3 "s_register_operand" "w"))
672                   (match_operand:VH 1 "s_register_operand" "0")))]
673   "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
674   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
675   [(set_attr "type" "neon_fp_mla_s<q>")]
678 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
679   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
680         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
681                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
682                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
683   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
684   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
685   [(set (attr "type")
686       (if_then_else (match_test "<Is_float_mode>")
687                     (const_string "neon_fp_mla_s<q>")
688                     (const_string "neon_mla_<V_elem_ch><q>")))]
691 ;; Fused multiply-accumulate
692 ;; We define each insn twice here:
693 ;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
694 ;;       to be able to use when converting to FMA.
695 ;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
696 (define_insn "fma<VCVTF:mode>4"
697   [(set (match_operand:VCVTF 0 "register_operand" "=w")
698         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
699                  (match_operand:VCVTF 2 "register_operand" "w")
700                  (match_operand:VCVTF 3 "register_operand" "0")))]
701   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
702   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703   [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VCVTF:mode>4_intrinsic"
707   [(set (match_operand:VCVTF 0 "register_operand" "=w")
708         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
709                  (match_operand:VCVTF 2 "register_operand" "w")
710                  (match_operand:VCVTF 3 "register_operand" "0")))]
711   "TARGET_NEON && TARGET_FMA"
712   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713   [(set_attr "type" "neon_fp_mla_s<q>")]
716 (define_insn "fma<VH:mode>4"
717  [(set (match_operand:VH 0 "register_operand" "=w")
718    (fma:VH
719     (match_operand:VH 1 "register_operand" "w")
720     (match_operand:VH 2 "register_operand" "w")
721     (match_operand:VH 3 "register_operand" "0")))]
722  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
723  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724  [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fma<VH:mode>4_intrinsic"
728  [(set (match_operand:VH 0 "register_operand" "=w")
729    (fma:VH
730     (match_operand:VH 1 "register_operand" "w")
731     (match_operand:VH 2 "register_operand" "w")
732     (match_operand:VH 3 "register_operand" "0")))]
733  "TARGET_NEON_FP16INST"
734  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735  [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "*fmsub<VCVTF:mode>4"
739   [(set (match_operand:VCVTF 0 "register_operand" "=w")
740         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
741                    (match_operand:VCVTF 2 "register_operand" "w")
742                    (match_operand:VCVTF 3 "register_operand" "0")))]
743   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
744   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
745   [(set_attr "type" "neon_fp_mla_s<q>")]
748 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
749  [(set (match_operand:VCVTF 0 "register_operand" "=w")
750    (fma:VCVTF
751     (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
752     (match_operand:VCVTF 2 "register_operand" "w")
753     (match_operand:VCVTF 3 "register_operand" "0")))]
754  "TARGET_NEON && TARGET_FMA"
755  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
756  [(set_attr "type" "neon_fp_mla_s<q>")]
759 (define_insn "fmsub<VH:mode>4_intrinsic"
760  [(set (match_operand:VH 0 "register_operand" "=w")
761    (fma:VH
762     (neg:VH (match_operand:VH 1 "register_operand" "w"))
763     (match_operand:VH 2 "register_operand" "w")
764     (match_operand:VH 3 "register_operand" "0")))]
765  "TARGET_NEON_FP16INST"
766  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767  [(set_attr "type" "neon_fp_mla_s<q>")]
770 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
771   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
772         (unspec:VCVTF [(match_operand:VCVTF 1
773                          "s_register_operand" "w")]
774                 NEON_VRINT))]
775   "TARGET_NEON && TARGET_VFP5"
776   "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
777   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
780 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
781   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
782         (FIXUORS:<V_cmp_result> (unspec:VCVTF
783                                [(match_operand:VCVTF 1 "register_operand" "w")]
784                                NEON_VCVT)))]
785   "TARGET_NEON && TARGET_VFP5"
786   "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
787   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
788    (set_attr "predicable" "no")]
791 (define_insn "ior<mode>3"
792   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
793         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
794                  (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
795   "TARGET_NEON"
797   switch (which_alternative)
798     {
799     case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
800     case 1: return neon_output_logic_immediate ("vorr", &operands[2],
801                      <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
802     default: gcc_unreachable ();
803     }
805   [(set_attr "type" "neon_logic<q>")]
808 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
809 ;; vorr. We support the pseudo-instruction vand instead, because that
810 ;; corresponds to the canonical form the middle-end expects to use for
811 ;; immediate bitwise-ANDs.
813 (define_insn "and<mode>3"
814   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
815         (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
816                  (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
817   "TARGET_NEON"
819   switch (which_alternative)
820     {
821     case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
822     case 1: return neon_output_logic_immediate ("vand", &operands[2],
823                      <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
824     default: gcc_unreachable ();
825     }
827   [(set_attr "type" "neon_logic<q>")]
830 (define_insn "orn<mode>3_neon"
831   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
832         (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
833                  (match_operand:VDQ 1 "s_register_operand" "w")))]
834   "TARGET_NEON"
835   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
836   [(set_attr "type" "neon_logic<q>")]
839 ;; TODO: investigate whether we should disable 
840 ;; this and bicdi3_neon for the A8 in line with the other
841 ;; changes above. 
842 (define_insn_and_split "orndi3_neon"
843   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
844         (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
845                 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
846   "TARGET_NEON"
847   "@
848    vorn\t%P0, %P1, %P2
849    #
850    #
851    #"
852   "reload_completed && 
853    (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
854   [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
855    (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
856   "
857   {
858     if (TARGET_THUMB2)
859       {
860         operands[3] = gen_highpart (SImode, operands[0]);
861         operands[0] = gen_lowpart (SImode, operands[0]);
862         operands[4] = gen_highpart (SImode, operands[2]);
863         operands[2] = gen_lowpart (SImode, operands[2]);
864         operands[5] = gen_highpart (SImode, operands[1]);
865         operands[1] = gen_lowpart (SImode, operands[1]);
866       }
867     else
868       {
869         emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
870         emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
871         DONE;
872       }
873   }"
874   [(set_attr "type" "neon_logic,multiple,multiple,multiple")
875    (set_attr "length" "*,16,8,8")
876    (set_attr "arch" "any,a,t2,t2")]
879 (define_insn "bic<mode>3_neon"
880   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
881         (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
882                  (match_operand:VDQ 1 "s_register_operand" "w")))]
883   "TARGET_NEON"
884   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
885   [(set_attr "type" "neon_logic<q>")]
888 ;; Compare to *anddi_notdi_di.
889 (define_insn "bicdi3_neon"
890   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
891         (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
892                 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
893   "TARGET_NEON"
894   "@
895    vbic\t%P0, %P1, %P2
896    #
897    #"
898   [(set_attr "type" "neon_logic,multiple,multiple")
899    (set_attr "length" "*,8,8")]
902 (define_insn "xor<mode>3"
903   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
904         (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
905                  (match_operand:VDQ 2 "s_register_operand" "w")))]
906   "TARGET_NEON"
907   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
908   [(set_attr "type" "neon_logic<q>")]
911 (define_insn "one_cmpl<mode>2"
912   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
913         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
914   "TARGET_NEON"
915   "vmvn\t%<V_reg>0, %<V_reg>1"
916   [(set_attr "type" "neon_move<q>")]
919 (define_insn "abs<mode>2"
920   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
921         (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
922   "TARGET_NEON"
923   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
924   [(set (attr "type")
925       (if_then_else (match_test "<Is_float_mode>")
926                     (const_string "neon_fp_abs_s<q>")
927                     (const_string "neon_abs<q>")))]
930 (define_insn "neg<mode>2"
931   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
932         (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
933   "TARGET_NEON"
934   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
935   [(set (attr "type")
936       (if_then_else (match_test "<Is_float_mode>")
937                     (const_string "neon_fp_neg_s<q>")
938                     (const_string "neon_neg<q>")))]
941 (define_insn "negdi2_neon"
942   [(set (match_operand:DI 0 "s_register_operand"         "=&w, w,r,&r")
943         (neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
944    (clobber (match_scratch:DI 2                          "= X,&w,X, X"))
945    (clobber (reg:CC CC_REGNUM))]
946   "TARGET_NEON"
947   "#"
948   [(set_attr "length" "8")
949    (set_attr "type" "multiple")]
952 ; Split negdi2_neon for vfp registers
953 (define_split
954   [(set (match_operand:DI 0 "s_register_operand" "")
955         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
956    (clobber (match_scratch:DI 2 ""))
957    (clobber (reg:CC CC_REGNUM))]
958   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
959   [(set (match_dup 2) (const_int 0))
960    (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
961               (clobber (reg:CC CC_REGNUM))])]
962   {
963     if (!REG_P (operands[2]))
964       operands[2] = operands[0];
965   }
968 ; Split negdi2_neon for core registers
969 (define_split
970   [(set (match_operand:DI 0 "s_register_operand" "")
971         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
972    (clobber (match_scratch:DI 2 ""))
973    (clobber (reg:CC CC_REGNUM))]
974   "TARGET_32BIT && reload_completed
975    && arm_general_register_operand (operands[0], DImode)"
976   [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
977               (clobber (reg:CC CC_REGNUM))])]
978   ""
981 (define_insn "<absneg_str><mode>2"
982   [(set (match_operand:VH 0 "s_register_operand" "=w")
983     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
984  "TARGET_NEON_FP16INST"
985  "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
986  [(set_attr "type" "neon_abs<q>")]
989 (define_expand "neon_v<absneg_str><mode>"
990  [(set
991    (match_operand:VH 0 "s_register_operand")
992    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
993  "TARGET_NEON_FP16INST"
995   emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
996   DONE;
999 (define_insn "neon_v<fp16_rnd_str><mode>"
1000   [(set (match_operand:VH 0 "s_register_operand" "=w")
1001     (unspec:VH
1002      [(match_operand:VH 1 "s_register_operand" "w")]
1003      FP16_RND))]
1004  "TARGET_NEON_FP16INST"
1005  "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1006  [(set_attr "type" "neon_fp_round_s<q>")]
1009 (define_insn "neon_vrsqrte<mode>"
1010   [(set (match_operand:VH 0 "s_register_operand" "=w")
1011     (unspec:VH
1012      [(match_operand:VH 1 "s_register_operand" "w")]
1013      UNSPEC_VRSQRTE))]
1014   "TARGET_NEON_FP16INST"
1015   "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1016  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1019 (define_insn "*umin<mode>3_neon"
1020   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1021         (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1022                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1023   "TARGET_NEON"
1024   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1025   [(set_attr "type" "neon_minmax<q>")]
1028 (define_insn "*umax<mode>3_neon"
1029   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1030         (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1031                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1032   "TARGET_NEON"
1033   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034   [(set_attr "type" "neon_minmax<q>")]
1037 (define_insn "*smin<mode>3_neon"
1038   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1039         (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1040                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1041   "TARGET_NEON"
1042   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1043   [(set (attr "type")
1044       (if_then_else (match_test "<Is_float_mode>")
1045                     (const_string "neon_fp_minmax_s<q>")
1046                     (const_string "neon_minmax<q>")))]
1049 (define_insn "*smax<mode>3_neon"
1050   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1051         (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1052                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1053   "TARGET_NEON"
1054   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1055   [(set (attr "type")
1056       (if_then_else (match_test "<Is_float_mode>")
1057                     (const_string "neon_fp_minmax_s<q>")
1058                     (const_string "neon_minmax<q>")))]
1061 ; TODO: V2DI shifts are current disabled because there are bugs in the
1062 ; generic vectorizer code.  It ends up creating a V2DI constructor with
1063 ; SImode elements.
1065 (define_insn "vashl<mode>3"
1066   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1067         (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1068                       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1069   "TARGET_NEON"
1070   {
1071     switch (which_alternative)
1072       {
1073         case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1074         case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1075                                                     <MODE>mode,
1076                                                     VALID_NEON_QREG_MODE (<MODE>mode),
1077                                                     true);
1078         default: gcc_unreachable ();
1079       }
1080   }
1081   [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1084 (define_insn "vashr<mode>3_imm"
1085   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1086         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1087                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1088   "TARGET_NEON"
1089   {
1090     return neon_output_shift_immediate ("vshr", 's', &operands[2],
1091                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1092                                         false);
1093   }
1094   [(set_attr "type" "neon_shift_imm<q>")]
1097 (define_insn "vlshr<mode>3_imm"
1098   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1099         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1100                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1101   "TARGET_NEON"
1102   {
1103     return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1104                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1105                                         false);
1106   }              
1107   [(set_attr "type" "neon_shift_imm<q>")]
1110 ; Used for implementing logical shift-right, which is a left-shift by a negative
1111 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1112 ; above, but using an unspec in case GCC tries anything tricky with negative
1113 ; shift amounts.
1115 (define_insn "ashl<mode>3_signed"
1116   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1117         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1118                       (match_operand:VDQI 2 "s_register_operand" "w")]
1119                      UNSPEC_ASHIFT_SIGNED))]
1120   "TARGET_NEON"
1121   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1122   [(set_attr "type" "neon_shift_reg<q>")]
1125 ; Used for implementing logical shift-right, which is a left-shift by a negative
1126 ; amount, with unsigned operands.
1128 (define_insn "ashl<mode>3_unsigned"
1129   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1130         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1131                       (match_operand:VDQI 2 "s_register_operand" "w")]
1132                      UNSPEC_ASHIFT_UNSIGNED))]
1133   "TARGET_NEON"
1134   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1135   [(set_attr "type" "neon_shift_reg<q>")]
1138 (define_expand "vashr<mode>3"
1139   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1140         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1141                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1142   "TARGET_NEON"
1144   if (s_register_operand (operands[2], <MODE>mode))
1145     {
1146       rtx neg = gen_reg_rtx (<MODE>mode);
1147       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1148       emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1149     }
1150   else
1151     emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1152   DONE;
1155 (define_expand "vlshr<mode>3"
1156   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1157         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1158                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1159   "TARGET_NEON"
1161   if (s_register_operand (operands[2], <MODE>mode))
1162     {
1163       rtx neg = gen_reg_rtx (<MODE>mode);
1164       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1165       emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1166     }
1167   else
1168     emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1169   DONE;
1172 ;; 64-bit shifts
1174 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1175 ;; leaving the upper half uninitalized.  This is OK since the shift
1176 ;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1177 ;; data flow analysis however, we pretend the full register is set
1178 ;; using an unspec.
1179 (define_insn "neon_load_count"
1180   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1181         (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1182                    UNSPEC_LOAD_COUNT))]
1183   "TARGET_NEON"
1184   "@
1185    vld1.32\t{%P0[0]}, %A1
1186    vmov.32\t%P0[0], %1"
1187   [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1190 (define_insn "ashldi3_neon_noclobber"
1191   [(set (match_operand:DI 0 "s_register_operand"            "=w,w")
1192         (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1193                    (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1194   "TARGET_NEON && reload_completed
1195    && (!CONST_INT_P (operands[2])
1196        || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1197   "@
1198    vshl.u64\t%P0, %P1, %2
1199    vshl.u64\t%P0, %P1, %P2"
1200   [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1203 (define_insn_and_split "ashldi3_neon"
1204   [(set (match_operand:DI 0 "s_register_operand"            "= w, w, &r, r, &r, ?w,?w")
1205         (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w, w")
1206                    (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm, i")))
1207    (clobber (match_scratch:SI 3                             "= X, X, &r, X,  X,  X, X"))
1208    (clobber (match_scratch:SI 4                             "= X, X, &r, X,  X,  X, X"))
1209    (clobber (match_scratch:DI 5                             "=&w, X,  X, X,  X, &w, X"))
1210    (clobber (reg:CC_C CC_REGNUM))]
1211   "TARGET_NEON"
1212   "#"
1213   "TARGET_NEON && reload_completed"
1214   [(const_int 0)]
1215   "
1216   {
1217     if (IS_VFP_REGNUM (REGNO (operands[0])))
1218       {
1219         if (CONST_INT_P (operands[2]))
1220           {
1221             if (INTVAL (operands[2]) < 1)
1222               {
1223                 emit_insn (gen_movdi (operands[0], operands[1]));
1224                 DONE;
1225               }
1226             else if (INTVAL (operands[2]) > 63)
1227               operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1228           }
1229         else
1230           {
1231             emit_insn (gen_neon_load_count (operands[5], operands[2]));
1232             operands[2] = operands[5];
1233           }
1235         /* Ditch the unnecessary clobbers.  */
1236         emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1237                                                operands[2]));
1238       }
1239     else
1240       {
1241         /* The shift expanders support either full overlap or no overlap.  */
1242         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1243                     || REGNO (operands[0]) == REGNO (operands[1]));
1245         arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1246                                        operands[2], operands[3], operands[4]);
1247       }
1248     DONE;
1249   }"
1250   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1251    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1252    (set_attr "type" "multiple")]
1255 ; The shift amount needs to be negated for right-shifts
1256 (define_insn "signed_shift_di3_neon"
1257   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1258         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1259                     (match_operand:DI 2 "s_register_operand" " w")]
1260                    UNSPEC_ASHIFT_SIGNED))]
1261   "TARGET_NEON && reload_completed"
1262   "vshl.s64\t%P0, %P1, %P2"
1263   [(set_attr "type" "neon_shift_reg")]
1266 ; The shift amount needs to be negated for right-shifts
1267 (define_insn "unsigned_shift_di3_neon"
1268   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1269         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1270                     (match_operand:DI 2 "s_register_operand" " w")]
1271                    UNSPEC_ASHIFT_UNSIGNED))]
1272   "TARGET_NEON && reload_completed"
1273   "vshl.u64\t%P0, %P1, %P2"
1274   [(set_attr "type" "neon_shift_reg")]
1277 (define_insn "ashrdi3_neon_imm_noclobber"
1278   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1279         (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1280                      (match_operand:DI 2 "const_int_operand"  " i")))]
1281   "TARGET_NEON && reload_completed
1282    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1283   "vshr.s64\t%P0, %P1, %2"
1284   [(set_attr "type" "neon_shift_imm")]
1287 (define_insn "lshrdi3_neon_imm_noclobber"
1288   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1289         (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1290                      (match_operand:DI 2 "const_int_operand"  " i")))]
1291   "TARGET_NEON && reload_completed
1292    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1293   "vshr.u64\t%P0, %P1, %2"
1294   [(set_attr "type" "neon_shift_imm")]
1297 ;; ashrdi3_neon
1298 ;; lshrdi3_neon
1299 (define_insn_and_split "<shift>di3_neon"
1300   [(set (match_operand:DI 0 "s_register_operand"             "= w, w, &r, r, &r,?w,?w")
1301         (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
1302                     (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
1303    (clobber (match_scratch:SI 3                              "=2r, X, &r, X,  X,2r, X"))
1304    (clobber (match_scratch:SI 4                              "= X, X, &r, X,  X, X, X"))
1305    (clobber (match_scratch:DI 5                              "=&w, X,  X, X, X,&w, X"))
1306    (clobber (reg:CC CC_REGNUM))]
1307   "TARGET_NEON"
1308   "#"
1309   "TARGET_NEON && reload_completed"
1310   [(const_int 0)]
1311   "
1312   {
1313     if (IS_VFP_REGNUM (REGNO (operands[0])))
1314       {
1315         if (CONST_INT_P (operands[2]))
1316           {
1317             if (INTVAL (operands[2]) < 1)
1318               {
1319                 emit_insn (gen_movdi (operands[0], operands[1]));
1320                 DONE;
1321               }
1322             else if (INTVAL (operands[2]) > 64)
1323               operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1325             /* Ditch the unnecessary clobbers.  */
1326             emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1327                                                           operands[1],
1328                                                           operands[2]));
1329           }
1330         else 
1331           {
1332             /* We must use a negative left-shift.  */
1333             emit_insn (gen_negsi2 (operands[3], operands[2]));
1334             emit_insn (gen_neon_load_count (operands[5], operands[3]));
1335             emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1336                                                        operands[5]));
1337           }
1338       }
1339     else
1340       {
1341         /* The shift expanders support either full overlap or no overlap.  */
1342         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1343                     || REGNO (operands[0]) == REGNO (operands[1]));
1345         /* This clobbers CC (ASHIFTRT by register only).  */
1346         arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1347                                        operands[2], operands[3], operands[4]);
1348       }
1350     DONE;
1351   }"
1352   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1353    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1354    (set_attr "type" "multiple")]
1357 ;; Widening operations
1359 (define_expand "widen_ssum<mode>3"
1360   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1361         (plus:<V_double_width>
1362          (sign_extend:<V_double_width>
1363           (match_operand:VQI 1 "s_register_operand" ""))
1364          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1365   "TARGET_NEON"
1366   {
1367     machine_mode mode = GET_MODE (operands[1]);
1368     rtx p1, p2;
1370     p1  = arm_simd_vect_par_cnst_half (mode, false);
1371     p2  = arm_simd_vect_par_cnst_half (mode, true);
1373     if (operands[0] != operands[2])
1374       emit_move_insn (operands[0], operands[2]);
1376     emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1377                                                          operands[1],
1378                                                          p1,
1379                                                          operands[0]));
1380     emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1381                                                          operands[1],
1382                                                          p2,
1383                                                          operands[0]));
1384     DONE;
1385   }
1388 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1389   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1390         (plus:<V_double_width>
1391          (sign_extend:<V_double_width>
1392           (vec_select:<V_HALF>
1393            (match_operand:VQI 1 "s_register_operand" "%w")
1394            (match_operand:VQI 2 "vect_par_constant_low" "")))
1395          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1396   "TARGET_NEON"
1398   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1399     "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1401   [(set_attr "type" "neon_add_widen")])
1403 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1404   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1405         (plus:<V_double_width>
1406          (sign_extend:<V_double_width>
1407           (vec_select:<V_HALF>
1408                          (match_operand:VQI 1 "s_register_operand" "%w")
1409                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1410          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1411   "TARGET_NEON"
1413   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1414     "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1416   [(set_attr "type" "neon_add_widen")])
1418 (define_insn "widen_ssum<mode>3"
1419   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1420         (plus:<V_widen>
1421          (sign_extend:<V_widen>
1422           (match_operand:VW 1 "s_register_operand" "%w"))
1423          (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1424   "TARGET_NEON"
1425   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1426   [(set_attr "type" "neon_add_widen")]
1429 (define_expand "widen_usum<mode>3"
1430   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1431         (plus:<V_double_width>
1432          (zero_extend:<V_double_width>
1433           (match_operand:VQI 1 "s_register_operand" ""))
1434          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1435   "TARGET_NEON"
1436   {
1437     machine_mode mode = GET_MODE (operands[1]);
1438     rtx p1, p2;
1440     p1  = arm_simd_vect_par_cnst_half (mode, false);
1441     p2  = arm_simd_vect_par_cnst_half (mode, true);
1443     if (operands[0] != operands[2])
1444       emit_move_insn (operands[0], operands[2]);
1446     emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1447                                                          operands[1],
1448                                                          p1,
1449                                                          operands[0]));
1450     emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1451                                                          operands[1],
1452                                                          p2,
1453                                                          operands[0]));
1454     DONE;
1455   }
1458 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1459   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1460         (plus:<V_double_width>
1461          (zero_extend:<V_double_width>
1462           (vec_select:<V_HALF>
1463            (match_operand:VQI 1 "s_register_operand" "%w")
1464            (match_operand:VQI 2 "vect_par_constant_low" "")))
1465          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1466   "TARGET_NEON"
1468   return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1469     "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1471   [(set_attr "type" "neon_add_widen")])
1473 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1474   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1475         (plus:<V_double_width>
1476          (zero_extend:<V_double_width>
1477           (vec_select:<V_HALF>
1478                          (match_operand:VQI 1 "s_register_operand" "%w")
1479                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1480          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1481   "TARGET_NEON"
1483  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1484     "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1486   [(set_attr "type" "neon_add_widen")])
1488 (define_insn "widen_usum<mode>3"
1489   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1490         (plus:<V_widen> (zero_extend:<V_widen>
1491                           (match_operand:VW 1 "s_register_operand" "%w"))
1492                         (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1493   "TARGET_NEON"
1494   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1495   [(set_attr "type" "neon_add_widen")]
1498 ;; Helpers for quad-word reduction operations
1500 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1501 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1502 ; N/2-element vector.
1504 (define_insn "quad_halves_<code>v4si"
1505   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1506         (VQH_OPS:V2SI
1507           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1508                            (parallel [(const_int 0) (const_int 1)]))
1509           (vec_select:V2SI (match_dup 1)
1510                            (parallel [(const_int 2) (const_int 3)]))))]
1511   "TARGET_NEON"
1512   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1513   [(set_attr "vqh_mnem" "<VQH_mnem>")
1514    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1517 (define_insn "quad_halves_<code>v4sf"
1518   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1519         (VQHS_OPS:V2SF
1520           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1521                            (parallel [(const_int 0) (const_int 1)]))
1522           (vec_select:V2SF (match_dup 1)
1523                            (parallel [(const_int 2) (const_int 3)]))))]
1524   "TARGET_NEON && flag_unsafe_math_optimizations"
1525   "<VQH_mnem>.f32\t%P0, %e1, %f1"
1526   [(set_attr "vqh_mnem" "<VQH_mnem>")
1527    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1530 (define_insn "quad_halves_<code>v8hi"
1531   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1532         (VQH_OPS:V4HI
1533           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1534                            (parallel [(const_int 0) (const_int 1)
1535                                       (const_int 2) (const_int 3)]))
1536           (vec_select:V4HI (match_dup 1)
1537                            (parallel [(const_int 4) (const_int 5)
1538                                       (const_int 6) (const_int 7)]))))]
1539   "TARGET_NEON"
1540   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1541   [(set_attr "vqh_mnem" "<VQH_mnem>")
1542    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1545 (define_insn "quad_halves_<code>v16qi"
1546   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1547         (VQH_OPS:V8QI
1548           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1549                            (parallel [(const_int 0) (const_int 1)
1550                                       (const_int 2) (const_int 3)
1551                                       (const_int 4) (const_int 5)
1552                                       (const_int 6) (const_int 7)]))
1553           (vec_select:V8QI (match_dup 1)
1554                            (parallel [(const_int 8) (const_int 9)
1555                                       (const_int 10) (const_int 11)
1556                                       (const_int 12) (const_int 13)
1557                                       (const_int 14) (const_int 15)]))))]
1558   "TARGET_NEON"
1559   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1560   [(set_attr "vqh_mnem" "<VQH_mnem>")
1561    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1564 (define_expand "move_hi_quad_<mode>"
1565  [(match_operand:ANY128 0 "s_register_operand" "")
1566   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1567  "TARGET_NEON"
1569   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1570                                        GET_MODE_SIZE (<V_HALF>mode)),
1571                   operands[1]);
1572   DONE;
1575 (define_expand "move_lo_quad_<mode>"
1576  [(match_operand:ANY128 0 "s_register_operand" "")
1577   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1578  "TARGET_NEON"
1580   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1581                                        <MODE>mode, 0),
1582                   operands[1]);
1583   DONE;
1586 ;; Reduction operations
1588 (define_expand "reduc_plus_scal_<mode>"
1589   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590    (match_operand:VD 1 "s_register_operand" "")]
1591   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1593   rtx vec = gen_reg_rtx (<MODE>mode);
1594   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1595                         &gen_neon_vpadd_internal<mode>);
1596   /* The same result is actually computed into every element.  */
1597   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1598   DONE;
1601 (define_expand "reduc_plus_scal_<mode>"
1602   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1603    (match_operand:VQ 1 "s_register_operand" "")]
1604   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1605    && !BYTES_BIG_ENDIAN"
1607   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1609   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1610   emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1612   DONE;
1615 (define_expand "reduc_plus_scal_v2di"
1616   [(match_operand:DI 0 "nonimmediate_operand" "=w")
1617    (match_operand:V2DI 1 "s_register_operand" "")]
1618   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620   rtx vec = gen_reg_rtx (V2DImode);
1622   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1623   emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1625   DONE;
1628 (define_insn "arm_reduc_plus_internal_v2di"
1629   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1630         (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1631                      UNSPEC_VPADD))]
1632   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1633   "vadd.i64\t%e0, %e1, %f1"
1634   [(set_attr "type" "neon_add_q")]
1637 (define_expand "reduc_smin_scal_<mode>"
1638   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1639    (match_operand:VD 1 "s_register_operand" "")]
1640   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1642   rtx vec = gen_reg_rtx (<MODE>mode);
1644   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1645                         &gen_neon_vpsmin<mode>);
1646   /* The result is computed into every element of the vector.  */
1647   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1648   DONE;
1651 (define_expand "reduc_smin_scal_<mode>"
1652   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1653    (match_operand:VQ 1 "s_register_operand" "")]
1654   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1655    && !BYTES_BIG_ENDIAN"
1657   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1659   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1660   emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1662   DONE;
1665 (define_expand "reduc_smax_scal_<mode>"
1666   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667    (match_operand:VD 1 "s_register_operand" "")]
1668   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1670   rtx vec = gen_reg_rtx (<MODE>mode);
1671   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1672                         &gen_neon_vpsmax<mode>);
1673   /* The result is computed into every element of the vector.  */
1674   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1675   DONE;
1678 (define_expand "reduc_smax_scal_<mode>"
1679   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1680    (match_operand:VQ 1 "s_register_operand" "")]
1681   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1682    && !BYTES_BIG_ENDIAN"
1684   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1686   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1687   emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1689   DONE;
1692 (define_expand "reduc_umin_scal_<mode>"
1693   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694    (match_operand:VDI 1 "s_register_operand" "")]
1695   "TARGET_NEON"
1697   rtx vec = gen_reg_rtx (<MODE>mode);
1698   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1699                         &gen_neon_vpumin<mode>);
1700   /* The result is computed into every element of the vector.  */
1701   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1702   DONE;
1705 (define_expand "reduc_umin_scal_<mode>"
1706   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707    (match_operand:VQI 1 "s_register_operand" "")]
1708   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1710   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1712   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1713   emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1715   DONE;
1718 (define_expand "reduc_umax_scal_<mode>"
1719   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720    (match_operand:VDI 1 "s_register_operand" "")]
1721   "TARGET_NEON"
1723   rtx vec = gen_reg_rtx (<MODE>mode);
1724   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1725                         &gen_neon_vpumax<mode>);
1726   /* The result is computed into every element of the vector.  */
1727   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1728   DONE;
1731 (define_expand "reduc_umax_scal_<mode>"
1732   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1733    (match_operand:VQI 1 "s_register_operand" "")]
1734   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1736   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1738   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1739   emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1741   DONE;
1744 (define_insn "neon_vpadd_internal<mode>"
1745   [(set (match_operand:VD 0 "s_register_operand" "=w")
1746         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1747                     (match_operand:VD 2 "s_register_operand" "w")]
1748                    UNSPEC_VPADD))]
1749   "TARGET_NEON"
1750   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1751   ;; Assume this schedules like vadd.
1752   [(set (attr "type")
1753       (if_then_else (match_test "<Is_float_mode>")
1754                     (const_string "neon_fp_reduc_add_s<q>")
1755                     (const_string "neon_reduc_add<q>")))]
1758 (define_insn "neon_vpaddv4hf"
1759  [(set
1760    (match_operand:V4HF 0 "s_register_operand" "=w")
1761    (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1762                  (match_operand:V4HF 2 "s_register_operand" "w")]
1763     UNSPEC_VPADD))]
1764  "TARGET_NEON_FP16INST"
1765  "vpadd.f16\t%P0, %P1, %P2"
1766  [(set_attr "type" "neon_reduc_add")]
1769 (define_insn "neon_vpsmin<mode>"
1770   [(set (match_operand:VD 0 "s_register_operand" "=w")
1771         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772                     (match_operand:VD 2 "s_register_operand" "w")]
1773                    UNSPEC_VPSMIN))]
1774   "TARGET_NEON"
1775   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1776   [(set (attr "type")
1777       (if_then_else (match_test "<Is_float_mode>")
1778                     (const_string "neon_fp_reduc_minmax_s<q>")
1779                     (const_string "neon_reduc_minmax<q>")))]
1782 (define_insn "neon_vpsmax<mode>"
1783   [(set (match_operand:VD 0 "s_register_operand" "=w")
1784         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1785                     (match_operand:VD 2 "s_register_operand" "w")]
1786                    UNSPEC_VPSMAX))]
1787   "TARGET_NEON"
1788   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1789   [(set (attr "type")
1790       (if_then_else (match_test "<Is_float_mode>")
1791                     (const_string "neon_fp_reduc_minmax_s<q>")
1792                     (const_string "neon_reduc_minmax<q>")))]
1795 (define_insn "neon_vpumin<mode>"
1796   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1797         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1798                      (match_operand:VDI 2 "s_register_operand" "w")]
1799                    UNSPEC_VPUMIN))]
1800   "TARGET_NEON"
1801   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1802   [(set_attr "type" "neon_reduc_minmax<q>")]
1805 (define_insn "neon_vpumax<mode>"
1806   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1807         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1808                      (match_operand:VDI 2 "s_register_operand" "w")]
1809                    UNSPEC_VPUMAX))]
1810   "TARGET_NEON"
1811   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1812   [(set_attr "type" "neon_reduc_minmax<q>")]
1815 ;; Saturating arithmetic
1817 ; NOTE: Neon supports many more saturating variants of instructions than the
1818 ; following, but these are all GCC currently understands.
1819 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1820 ; yet either, although these patterns may be used by intrinsics when they're
1821 ; added.
1823 (define_insn "*ss_add<mode>_neon"
1824   [(set (match_operand:VD 0 "s_register_operand" "=w")
1825        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1826                    (match_operand:VD 2 "s_register_operand" "w")))]
1827   "TARGET_NEON"
1828   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1829   [(set_attr "type" "neon_qadd<q>")]
1832 (define_insn "*us_add<mode>_neon"
1833   [(set (match_operand:VD 0 "s_register_operand" "=w")
1834        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1835                    (match_operand:VD 2 "s_register_operand" "w")))]
1836   "TARGET_NEON"
1837   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1838   [(set_attr "type" "neon_qadd<q>")]
1841 (define_insn "*ss_sub<mode>_neon"
1842   [(set (match_operand:VD 0 "s_register_operand" "=w")
1843        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1844                     (match_operand:VD 2 "s_register_operand" "w")))]
1845   "TARGET_NEON"
1846   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1847   [(set_attr "type" "neon_qsub<q>")]
1850 (define_insn "*us_sub<mode>_neon"
1851   [(set (match_operand:VD 0 "s_register_operand" "=w")
1852        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1853                     (match_operand:VD 2 "s_register_operand" "w")))]
1854   "TARGET_NEON"
1855   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1856   [(set_attr "type" "neon_qsub<q>")]
1859 ;; Conditional instructions.  These are comparisons with conditional moves for
1860 ;; vectors.  They perform the assignment:
1861 ;;   
1862 ;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1864 ;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1865 ;; element-wise.
1867 (define_expand "vcond<mode><mode>"
1868   [(set (match_operand:VDQW 0 "s_register_operand" "")
1869         (if_then_else:VDQW
1870           (match_operator 3 "comparison_operator"
1871             [(match_operand:VDQW 4 "s_register_operand" "")
1872              (match_operand:VDQW 5 "nonmemory_operand" "")])
1873           (match_operand:VDQW 1 "s_register_operand" "")
1874           (match_operand:VDQW 2 "s_register_operand" "")))]
1875   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1877   int inverse = 0;
1878   int use_zero_form = 0;
1879   int swap_bsl_operands = 0;
1880   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1881   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1883   rtx (*base_comparison) (rtx, rtx, rtx);
1884   rtx (*complimentary_comparison) (rtx, rtx, rtx);
1886   switch (GET_CODE (operands[3]))
1887     {
1888     case GE:
1889     case GT:
1890     case LE:
1891     case LT:
1892     case EQ:
1893       if (operands[5] == CONST0_RTX (<MODE>mode))
1894         {
1895           use_zero_form = 1;
1896           break;
1897         }
1898       /* Fall through.  */
1899     default:
1900       if (!REG_P (operands[5]))
1901         operands[5] = force_reg (<MODE>mode, operands[5]);
1902     }
1904   switch (GET_CODE (operands[3]))
1905     {
1906     case LT:
1907     case UNLT:
1908       inverse = 1;
1909       /* Fall through.  */
1910     case GE:
1911     case UNGE:
1912     case ORDERED:
1913     case UNORDERED:
1914       base_comparison = gen_neon_vcge<mode>;
1915       complimentary_comparison = gen_neon_vcgt<mode>;
1916       break;
1917     case LE:
1918     case UNLE:
1919       inverse = 1;
1920       /* Fall through.  */
1921     case GT:
1922     case UNGT:
1923       base_comparison = gen_neon_vcgt<mode>;
1924       complimentary_comparison = gen_neon_vcge<mode>;
1925       break;
1926     case EQ:
1927     case NE:
1928     case UNEQ:
1929       base_comparison = gen_neon_vceq<mode>;
1930       complimentary_comparison = gen_neon_vceq<mode>;
1931       break;
1932     default:
1933       gcc_unreachable ();
1934     }
1936   switch (GET_CODE (operands[3]))
1937     {
1938     case LT:
1939     case LE:
1940     case GT:
1941     case GE:
1942     case EQ:
1943       /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1944          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1945          a GE b -> a GE b
1946          a GT b -> a GT b
1947          a LE b -> b GE a
1948          a LT b -> b GT a
1949          a EQ b -> a EQ b
1950          Note that there also exist direct comparison against 0 forms,
1951          so catch those as a special case.  */
1952       if (use_zero_form)
1953         {
1954           inverse = 0;
1955           switch (GET_CODE (operands[3]))
1956             {
1957             case LT:
1958               base_comparison = gen_neon_vclt<mode>;
1959               break;
1960             case LE:
1961               base_comparison = gen_neon_vcle<mode>;
1962               break;
1963             default:
1964               /* Do nothing, other zero form cases already have the correct
1965                  base_comparison.  */
1966               break;
1967             }
1968         }
1970       if (!inverse)
1971         emit_insn (base_comparison (mask, operands[4], operands[5]));
1972       else
1973         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1974       break;
1975     case UNLT:
1976     case UNLE:
1977     case UNGT:
1978     case UNGE:
1979     case NE:
1980       /* Vector compare returns false for lanes which are unordered, so if we use
1981          the inverse of the comparison we actually want to emit, then
1982          swap the operands to BSL, we will end up with the correct result.
1983          Note that a NE NaN and NaN NE b are true for all a, b.
1985          Our transformations are:
1986          a GE b -> !(b GT a)
1987          a GT b -> !(b GE a)
1988          a LE b -> !(a GT b)
1989          a LT b -> !(a GE b)
1990          a NE b -> !(a EQ b)  */
1992       if (inverse)
1993         emit_insn (base_comparison (mask, operands[4], operands[5]));
1994       else
1995         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1997       swap_bsl_operands = 1;
1998       break;
1999     case UNEQ:
2000       /* We check (a > b ||  b > a).  combining these comparisons give us
2001          true iff !(a != b && a ORDERED b), swapping the operands to BSL
2002          will then give us (a == b ||  a UNORDERED b) as intended.  */
2004       emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2005       emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2006       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2007       swap_bsl_operands = 1;
2008       break;
2009     case UNORDERED:
2010        /* Operands are ORDERED iff (a > b || b >= a).
2011          Swapping the operands to BSL will give the UNORDERED case.  */
2012      swap_bsl_operands = 1;
2013      /* Fall through.  */
2014     case ORDERED:
2015       emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2016       emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2017       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2018       break;
2019     default:
2020       gcc_unreachable ();
2021     }
2023   if (swap_bsl_operands)
2024     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2025                                     operands[1]));
2026   else
2027     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2028                                     operands[2]));
2029   DONE;
2032 (define_expand "vcondu<mode><mode>"
2033   [(set (match_operand:VDQIW 0 "s_register_operand" "")
2034         (if_then_else:VDQIW
2035           (match_operator 3 "arm_comparison_operator"
2036             [(match_operand:VDQIW 4 "s_register_operand" "")
2037              (match_operand:VDQIW 5 "s_register_operand" "")])
2038           (match_operand:VDQIW 1 "s_register_operand" "")
2039           (match_operand:VDQIW 2 "s_register_operand" "")))]
2040   "TARGET_NEON"
2042   rtx mask;
2043   int inverse = 0, immediate_zero = 0;
2044   
2045   mask = gen_reg_rtx (<V_cmp_result>mode);
2046   
2047   if (operands[5] == CONST0_RTX (<MODE>mode))
2048     immediate_zero = 1;
2049   else if (!REG_P (operands[5]))
2050     operands[5] = force_reg (<MODE>mode, operands[5]);
2051   
2052   switch (GET_CODE (operands[3]))
2053     {
2054     case GEU:
2055       emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2056       break;
2057     
2058     case GTU:
2059       emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2060       break;
2061     
2062     case EQ:
2063       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2064       break;
2065     
2066     case LEU:
2067       if (immediate_zero)
2068         emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2069       else
2070         emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2071       break;
2072     
2073     case LTU:
2074       if (immediate_zero)
2075         emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2076       else
2077         emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2078       break;
2079     
2080     case NE:
2081       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2082       inverse = 1;
2083       break;
2084     
2085     default:
2086       gcc_unreachable ();
2087     }
2088   
2089   if (inverse)
2090     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2091                                     operands[1]));
2092   else
2093     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2094                                     operands[2]));
2096   DONE;
2099 ;; Patterns for builtins.
2101 ; good for plain vadd, vaddq.
2103 (define_expand "neon_vadd<mode>"
2104   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2105    (match_operand:VCVTF 1 "s_register_operand" "w")
2106    (match_operand:VCVTF 2 "s_register_operand" "w")]
2107   "TARGET_NEON"
2109   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2110     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2111   else
2112     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2113                                            operands[2]));
2114   DONE;
2117 (define_expand "neon_vadd<mode>"
2118   [(match_operand:VH 0 "s_register_operand")
2119    (match_operand:VH 1 "s_register_operand")
2120    (match_operand:VH 2 "s_register_operand")]
2121   "TARGET_NEON_FP16INST"
2123   emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2124   DONE;
2127 (define_expand "neon_vsub<mode>"
2128   [(match_operand:VH 0 "s_register_operand")
2129    (match_operand:VH 1 "s_register_operand")
2130    (match_operand:VH 2 "s_register_operand")]
2131   "TARGET_NEON_FP16INST"
2133   emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2134   DONE;
2137 ; Note that NEON operations don't support the full IEEE 754 standard: in
2138 ; particular, denormal values are flushed to zero.  This means that GCC cannot
2139 ; use those instructions for autovectorization, etc. unless
2140 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2141 ; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
2142 ; header) must work in either case: if -funsafe-math-optimizations is given,
2143 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2144 ; expand to unspecs (which may potentially limit the extent to which they might
2145 ; be optimized by generic code).
2147 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2149 (define_insn "neon_vadd<mode>_unspec"
2150   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2151         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2152                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2153                      UNSPEC_VADD))]
2154   "TARGET_NEON"
2155   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2156   [(set (attr "type")
2157       (if_then_else (match_test "<Is_float_mode>")
2158                     (const_string "neon_fp_addsub_s<q>")
2159                     (const_string "neon_add<q>")))]
2162 (define_insn "neon_vaddl<sup><mode>"
2163   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2164         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2165                            (match_operand:VDI 2 "s_register_operand" "w")]
2166                           VADDL))]
2167   "TARGET_NEON"
2168   "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2169   [(set_attr "type" "neon_add_long")]
2172 (define_insn "neon_vaddw<sup><mode>"
2173   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2174         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2175                            (match_operand:VDI 2 "s_register_operand" "w")]
2176                           VADDW))]
2177   "TARGET_NEON"
2178   "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2179   [(set_attr "type" "neon_add_widen")]
2182 ; vhadd and vrhadd.
2184 (define_insn "neon_v<r>hadd<sup><mode>"
2185   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2186         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2187                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2188                       VHADD))]
2189   "TARGET_NEON"
2190   "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2191   [(set_attr "type" "neon_add_halve_q")]
2194 (define_insn "neon_vqadd<sup><mode>"
2195   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2196         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2197                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2198                      VQADD))]
2199   "TARGET_NEON"
2200   "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2201   [(set_attr "type" "neon_qadd<q>")]
2204 (define_insn "neon_v<r>addhn<mode>"
2205   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2206         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2207                             (match_operand:VN 2 "s_register_operand" "w")]
2208                            VADDHN))]
2209   "TARGET_NEON"
2210   "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2211   [(set_attr "type" "neon_add_halve_narrow_q")]
2214 ;; Polynomial and Float multiplication.
2215 (define_insn "neon_vmul<pf><mode>"
2216   [(set (match_operand:VPF 0 "s_register_operand" "=w")
2217         (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2218                       (match_operand:VPF 2 "s_register_operand" "w")]
2219                      UNSPEC_VMUL))]
2220   "TARGET_NEON"
2221   "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2222   [(set (attr "type")
2223       (if_then_else (match_test "<Is_float_mode>")
2224                     (const_string "neon_fp_mul_s<q>")
2225                     (const_string "neon_mul_<V_elem_ch><q>")))]
2228 (define_insn "mul<mode>3"
2229  [(set
2230    (match_operand:VH 0 "s_register_operand" "=w")
2231    (mult:VH
2232     (match_operand:VH 1 "s_register_operand" "w")
2233     (match_operand:VH 2 "s_register_operand" "w")))]
2234   "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2235   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2236  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2239 (define_insn "neon_vmulf<mode>"
2240  [(set
2241    (match_operand:VH 0 "s_register_operand" "=w")
2242    (mult:VH
2243     (match_operand:VH 1 "s_register_operand" "w")
2244     (match_operand:VH 2 "s_register_operand" "w")))]
2245   "TARGET_NEON_FP16INST"
2246   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2247  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2250 (define_expand "neon_vmla<mode>"
2251   [(match_operand:VDQW 0 "s_register_operand" "=w")
2252    (match_operand:VDQW 1 "s_register_operand" "0")
2253    (match_operand:VDQW 2 "s_register_operand" "w")
2254    (match_operand:VDQW 3 "s_register_operand" "w")]
2255   "TARGET_NEON"
2257   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2258     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2259                                              operands[2], operands[3]));
2260   else
2261     emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2262                                            operands[2], operands[3]));
2263   DONE;
2266 (define_expand "neon_vfma<VCVTF:mode>"
2267   [(match_operand:VCVTF 0 "s_register_operand")
2268    (match_operand:VCVTF 1 "s_register_operand")
2269    (match_operand:VCVTF 2 "s_register_operand")
2270    (match_operand:VCVTF 3 "s_register_operand")]
2271   "TARGET_NEON && TARGET_FMA"
2273   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2274                                        operands[1]));
2275   DONE;
2278 (define_expand "neon_vfma<VH:mode>"
2279   [(match_operand:VH 0 "s_register_operand")
2280    (match_operand:VH 1 "s_register_operand")
2281    (match_operand:VH 2 "s_register_operand")
2282    (match_operand:VH 3 "s_register_operand")]
2283   "TARGET_NEON_FP16INST"
2285   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2286                                        operands[1]));
2287   DONE;
2290 (define_expand "neon_vfms<VCVTF:mode>"
2291   [(match_operand:VCVTF 0 "s_register_operand")
2292    (match_operand:VCVTF 1 "s_register_operand")
2293    (match_operand:VCVTF 2 "s_register_operand")
2294    (match_operand:VCVTF 3 "s_register_operand")]
2295   "TARGET_NEON && TARGET_FMA"
2297   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2298                                          operands[1]));
2299   DONE;
2302 (define_expand "neon_vfms<VH:mode>"
2303   [(match_operand:VH 0 "s_register_operand")
2304    (match_operand:VH 1 "s_register_operand")
2305    (match_operand:VH 2 "s_register_operand")
2306    (match_operand:VH 3 "s_register_operand")]
2307   "TARGET_NEON_FP16INST"
2309   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2310                                          operands[1]));
2311   DONE;
2314 ;; The expand RTL structure here is not important.
2315 ;; We use the gen_* functions anyway.
2316 ;; We just need something to wrap the iterators around.
2318 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2319   [(set (match_operand:VCVTF 0 "s_register_operand")
2320      (unspec:VCVTF
2321         [(match_operand:VCVTF 1 "s_register_operand")
2322            (PLUSMINUS:<VFML>
2323              (match_operand:<VFML> 2 "s_register_operand")
2324              (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2325   "TARGET_FP16FML"
2327   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2328   emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2329                                                              operands[1],
2330                                                              operands[2],
2331                                                              operands[3],
2332                                                              half, half));
2333   DONE;
2336 (define_insn "vfmal_low<mode>_intrinsic"
2337  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2338         (fma:VCVTF
2339          (float_extend:VCVTF
2340           (vec_select:<VFMLSEL>
2341            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2342            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2343          (float_extend:VCVTF
2344           (vec_select:<VFMLSEL>
2345            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2346            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2347          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2348  "TARGET_FP16FML"
2349  "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2350  [(set_attr "type" "neon_fp_mla_s<q>")]
2353 (define_insn "vfmsl_high<mode>_intrinsic"
2354  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2355         (fma:VCVTF
2356          (float_extend:VCVTF
2357           (neg:<VFMLSEL>
2358             (vec_select:<VFMLSEL>
2359               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2360               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2361          (float_extend:VCVTF
2362           (vec_select:<VFMLSEL>
2363            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2364            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2365          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2366  "TARGET_FP16FML"
2367  "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2368  [(set_attr "type" "neon_fp_mla_s<q>")]
2371 (define_insn "vfmal_high<mode>_intrinsic"
2372  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2373         (fma:VCVTF
2374          (float_extend:VCVTF
2375           (vec_select:<VFMLSEL>
2376            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2377            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2378          (float_extend:VCVTF
2379           (vec_select:<VFMLSEL>
2380            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2381            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2382          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2383  "TARGET_FP16FML"
2384  "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2385  [(set_attr "type" "neon_fp_mla_s<q>")]
2388 (define_insn "vfmsl_low<mode>_intrinsic"
2389  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2390         (fma:VCVTF
2391          (float_extend:VCVTF
2392           (neg:<VFMLSEL>
2393             (vec_select:<VFMLSEL>
2394               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2395               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2396          (float_extend:VCVTF
2397           (vec_select:<VFMLSEL>
2398            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2399            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2400          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2401  "TARGET_FP16FML"
2402  "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2403  [(set_attr "type" "neon_fp_mla_s<q>")]
2406 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2407   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2408      (unspec:VCVTF
2409         [(match_operand:VCVTF 1 "s_register_operand")
2410          (PLUSMINUS:<VFML>
2411            (match_operand:<VFML> 2 "s_register_operand")
2412            (match_operand:<VFML> 3 "s_register_operand"))
2413          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2414   "TARGET_FP16FML"
2416   rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2417   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2418   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2419                                                (operands[0], operands[1],
2420                                                 operands[2], operands[3],
2421                                                 half, lane));
2422   DONE;
2425 (define_insn "vfmal_lane_low<mode>_intrinsic"
2426  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2427         (fma:VCVTF
2428          (float_extend:VCVTF
2429           (vec_select:<VFMLSEL>
2430            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2431            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2432          (float_extend:VCVTF
2433            (vec_duplicate:<VFMLSEL>
2434              (vec_select:HF
2435                (match_operand:<VFML> 3 "s_register_operand" "x")
2436                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2437          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2438  "TARGET_FP16FML"
2440     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2441     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2442       {
2443         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2444         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2445       }
2446     else
2447       {
2448         operands[5] = GEN_INT (lane);
2449         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2450       }
2451   }
2452  [(set_attr "type" "neon_fp_mla_s<q>")]
2455 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2456   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2457      (unspec:VCVTF
2458         [(match_operand:VCVTF 1 "s_register_operand")
2459          (PLUSMINUS:<VFML>
2460            (match_operand:<VFML> 2 "s_register_operand")
2461            (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2462          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2463   "TARGET_FP16FML"
2465   rtx lane
2466     = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2467   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2468   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2469                 (operands[0], operands[1], operands[2], operands[3],
2470                  half, lane));
2471   DONE;
2474 ;; Used to implement the intrinsics:
2475 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2476 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2477 ;; Needs a bit of care to get the modes of the different sub-expressions right
2478 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2479 ;; S or D subregister to select the appropriate lane from.
2481 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2482  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2483         (fma:VCVTF
2484          (float_extend:VCVTF
2485           (vec_select:<VFMLSEL>
2486            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2487            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2488          (float_extend:VCVTF
2489            (vec_duplicate:<VFMLSEL>
2490              (vec_select:HF
2491                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2492                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2493          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2494  "TARGET_FP16FML"
2496    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2497    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2498    int new_lane = lane % elts_per_reg;
2499    int regdiff = lane / elts_per_reg;
2500    operands[5] = GEN_INT (new_lane);
2501    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2502       because we want the print_operand code to print the appropriate
2503       S or D register prefix.  */
2504    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2505    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2506    return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2508  [(set_attr "type" "neon_fp_mla_s<q>")]
2511 ;; Used to implement the intrinsics:
2512 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2513 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2514 ;; Needs a bit of care to get the modes of the different sub-expressions right
2515 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2516 ;; S or D subregister to select the appropriate lane from.
2518 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2519  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2520         (fma:VCVTF
2521          (float_extend:VCVTF
2522           (vec_select:<VFMLSEL>
2523            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2524            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2525          (float_extend:VCVTF
2526            (vec_duplicate:<VFMLSEL>
2527              (vec_select:HF
2528                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2529                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2530          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2531  "TARGET_FP16FML"
2533    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2534    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2535    int new_lane = lane % elts_per_reg;
2536    int regdiff = lane / elts_per_reg;
2537    operands[5] = GEN_INT (new_lane);
2538    /* We re-create operands[3] in the halved VFMLSEL mode
2539       because we've calculated the correct half-width subreg to extract
2540       the lane from and we want to print *that* subreg instead.  */
2541    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2542    return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2544  [(set_attr "type" "neon_fp_mla_s<q>")]
2547 (define_insn "vfmal_lane_high<mode>_intrinsic"
2548  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2549         (fma:VCVTF
2550          (float_extend:VCVTF
2551           (vec_select:<VFMLSEL>
2552            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2553            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2554          (float_extend:VCVTF
2555            (vec_duplicate:<VFMLSEL>
2556              (vec_select:HF
2557                (match_operand:<VFML> 3 "s_register_operand" "x")
2558                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2559          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2560  "TARGET_FP16FML"
2561   {
2562     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2563     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2564       {
2565         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2566         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2567       }
2568     else
2569       {
2570         operands[5] = GEN_INT (lane);
2571         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2572       }
2573   }
2574  [(set_attr "type" "neon_fp_mla_s<q>")]
2577 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2578  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579         (fma:VCVTF
2580          (float_extend:VCVTF
2581           (neg:<VFMLSEL>
2582             (vec_select:<VFMLSEL>
2583               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2584               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2585          (float_extend:VCVTF
2586            (vec_duplicate:<VFMLSEL>
2587              (vec_select:HF
2588                (match_operand:<VFML> 3 "s_register_operand" "x")
2589                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2590          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2591  "TARGET_FP16FML"
2593     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2594     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2595       {
2596         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2597         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2598       }
2599     else
2600       {
2601         operands[5] = GEN_INT (lane);
2602         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2603       }
2604   }
2605  [(set_attr "type" "neon_fp_mla_s<q>")]
2608 ;; Used to implement the intrinsics:
2609 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2610 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2611 ;; Needs a bit of care to get the modes of the different sub-expressions right
2612 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2613 ;; S or D subregister to select the appropriate lane from.
2615 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2616  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2617         (fma:VCVTF
2618          (float_extend:VCVTF
2619           (neg:<VFMLSEL>
2620             (vec_select:<VFMLSEL>
2621               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2622               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2623          (float_extend:VCVTF
2624            (vec_duplicate:<VFMLSEL>
2625              (vec_select:HF
2626                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2627                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2628          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2629  "TARGET_FP16FML"
2631    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2632    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2633    int new_lane = lane % elts_per_reg;
2634    int regdiff = lane / elts_per_reg;
2635    operands[5] = GEN_INT (new_lane);
2636    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2637       because we want the print_operand code to print the appropriate
2638       S or D register prefix.  */
2639    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2640    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2641    return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2643  [(set_attr "type" "neon_fp_mla_s<q>")]
2646 ;; Used to implement the intrinsics:
2647 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2648 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2649 ;; Needs a bit of care to get the modes of the different sub-expressions right
2650 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2651 ;; S or D subregister to select the appropriate lane from.
2653 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2654  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2655         (fma:VCVTF
2656          (float_extend:VCVTF
2657           (neg:<VFMLSEL>
2658             (vec_select:<VFMLSEL>
2659              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2660              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2661          (float_extend:VCVTF
2662            (vec_duplicate:<VFMLSEL>
2663              (vec_select:HF
2664                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2665                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2666          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2667  "TARGET_FP16FML"
2669    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2670    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2671    int new_lane = lane % elts_per_reg;
2672    int regdiff = lane / elts_per_reg;
2673    operands[5] = GEN_INT (new_lane);
2674    /* We re-create operands[3] in the halved VFMLSEL mode
2675       because we've calculated the correct half-width subreg to extract
2676       the lane from and we want to print *that* subreg instead.  */
2677    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2678    return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2680  [(set_attr "type" "neon_fp_mla_s<q>")]
2683 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2684  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2685         (fma:VCVTF
2686          (float_extend:VCVTF
2687           (neg:<VFMLSEL>
2688             (vec_select:<VFMLSEL>
2689              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2690              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2691          (float_extend:VCVTF
2692            (vec_duplicate:<VFMLSEL>
2693              (vec_select:HF
2694                (match_operand:<VFML> 3 "s_register_operand" "x")
2695                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2696          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2697  "TARGET_FP16FML"
2698   {
2699     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2700     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2701       {
2702         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2703         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2704       }
2705     else
2706       {
2707         operands[5] = GEN_INT (lane);
2708         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2709       }
2710   }
2711  [(set_attr "type" "neon_fp_mla_s<q>")]
2714 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2716 (define_insn "neon_vmla<mode>_unspec"
2717   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2718         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2719                       (match_operand:VDQW 2 "s_register_operand" "w")
2720                       (match_operand:VDQW 3 "s_register_operand" "w")]
2721                     UNSPEC_VMLA))]
2722   "TARGET_NEON"
2723   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2724   [(set (attr "type")
2725       (if_then_else (match_test "<Is_float_mode>")
2726                     (const_string "neon_fp_mla_s<q>")
2727                     (const_string "neon_mla_<V_elem_ch><q>")))]
2730 (define_insn "neon_vmlal<sup><mode>"
2731   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2732         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2733                            (match_operand:VW 2 "s_register_operand" "w")
2734                            (match_operand:VW 3 "s_register_operand" "w")]
2735                           VMLAL))]
2736   "TARGET_NEON"
2737   "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2738   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2741 (define_expand "neon_vmls<mode>"
2742   [(match_operand:VDQW 0 "s_register_operand" "=w")
2743    (match_operand:VDQW 1 "s_register_operand" "0")
2744    (match_operand:VDQW 2 "s_register_operand" "w")
2745    (match_operand:VDQW 3 "s_register_operand" "w")]
2746   "TARGET_NEON"
2748   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2749     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2750                  operands[1], operands[2], operands[3]));
2751   else
2752     emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2753                                            operands[2], operands[3]));
2754   DONE;
2757 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2759 (define_insn "neon_vmls<mode>_unspec"
2760   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2761         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2762                       (match_operand:VDQW 2 "s_register_operand" "w")
2763                       (match_operand:VDQW 3 "s_register_operand" "w")]
2764                     UNSPEC_VMLS))]
2765   "TARGET_NEON"
2766   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2767   [(set (attr "type")
2768       (if_then_else (match_test "<Is_float_mode>")
2769                     (const_string "neon_fp_mla_s<q>")
2770                     (const_string "neon_mla_<V_elem_ch><q>")))]
2773 (define_insn "neon_vmlsl<sup><mode>"
2774   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2775         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2776                            (match_operand:VW 2 "s_register_operand" "w")
2777                            (match_operand:VW 3 "s_register_operand" "w")]
2778                           VMLSL))]
2779   "TARGET_NEON"
2780   "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2781   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2784 ;; vqdmulh, vqrdmulh
2785 (define_insn "neon_vq<r>dmulh<mode>"
2786   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2787         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2788                        (match_operand:VMDQI 2 "s_register_operand" "w")]
2789                       VQDMULH))]
2790   "TARGET_NEON"
2791   "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2792   [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2795 ;; vqrdmlah, vqrdmlsh
2796 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2797   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2798         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2799                        (match_operand:VMDQI 2 "s_register_operand" "w")
2800                        (match_operand:VMDQI 3 "s_register_operand" "w")]
2801                       VQRDMLH_AS))]
2802   "TARGET_NEON_RDMA"
2803   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2804   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2807 (define_insn "neon_vqdmlal<mode>"
2808   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2809         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2810                            (match_operand:VMDI 2 "s_register_operand" "w")
2811                            (match_operand:VMDI 3 "s_register_operand" "w")]
2812                           UNSPEC_VQDMLAL))]
2813   "TARGET_NEON"
2814   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2815   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2818 (define_insn "neon_vqdmlsl<mode>"
2819   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2821                            (match_operand:VMDI 2 "s_register_operand" "w")
2822                            (match_operand:VMDI 3 "s_register_operand" "w")]
2823                           UNSPEC_VQDMLSL))]
2824   "TARGET_NEON"
2825   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2826   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2829 (define_insn "neon_vmull<sup><mode>"
2830   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2831         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2832                            (match_operand:VW 2 "s_register_operand" "w")]
2833                           VMULL))]
2834   "TARGET_NEON"
2835   "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2836   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2839 (define_insn "neon_vqdmull<mode>"
2840   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2841         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2842                            (match_operand:VMDI 2 "s_register_operand" "w")]
2843                           UNSPEC_VQDMULL))]
2844   "TARGET_NEON"
2845   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2846   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2849 (define_expand "neon_vsub<mode>"
2850   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2851    (match_operand:VCVTF 1 "s_register_operand" "w")
2852    (match_operand:VCVTF 2 "s_register_operand" "w")]
2853   "TARGET_NEON"
2855   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2856     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2857   else
2858     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2859                                            operands[2]));
2860   DONE;
2863 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2865 (define_insn "neon_vsub<mode>_unspec"
2866   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2867         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2868                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2869                      UNSPEC_VSUB))]
2870   "TARGET_NEON"
2871   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2872   [(set (attr "type")
2873       (if_then_else (match_test "<Is_float_mode>")
2874                     (const_string "neon_fp_addsub_s<q>")
2875                     (const_string "neon_sub<q>")))]
2878 (define_insn "neon_vsubl<sup><mode>"
2879   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2880         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2881                            (match_operand:VDI 2 "s_register_operand" "w")]
2882                           VSUBL))]
2883   "TARGET_NEON"
2884   "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2885   [(set_attr "type" "neon_sub_long")]
2888 (define_insn "neon_vsubw<sup><mode>"
2889   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2890         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2891                            (match_operand:VDI 2 "s_register_operand" "w")]
2892                           VSUBW))]
2893   "TARGET_NEON"
2894   "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2895   [(set_attr "type" "neon_sub_widen")]
2898 (define_insn "neon_vqsub<sup><mode>"
2899   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2900         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2901                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2902                       VQSUB))]
2903   "TARGET_NEON"
2904   "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2905   [(set_attr "type" "neon_qsub<q>")]
2908 (define_insn "neon_vhsub<sup><mode>"
2909   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2910         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2911                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2912                       VHSUB))]
2913   "TARGET_NEON"
2914   "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2915   [(set_attr "type" "neon_sub_halve<q>")]
2918 (define_insn "neon_v<r>subhn<mode>"
2919   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2920         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2921                             (match_operand:VN 2 "s_register_operand" "w")]
2922                            VSUBHN))]
2923   "TARGET_NEON"
2924   "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2925   [(set_attr "type" "neon_sub_halve_narrow_q")]
2928 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2929 ;; without unsafe math optimizations.
2930 (define_expand "neon_vc<cmp_op><mode>"
2931   [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2932      (neg:<V_cmp_result>
2933        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2934                          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2935   "TARGET_NEON"
2936   {
2937     /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2938        are enabled.  */
2939     if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2940         && !flag_unsafe_math_optimizations)
2941       {
2942         /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2943            we define gen_neon_vceq<mode>_insn_unspec only for float modes
2944            whereas this expander iterates over the integer modes as well,
2945            but we will never expand to UNSPECs for the integer comparisons.  */
2946         switch (<MODE>mode)
2947           {
2948             case E_V2SFmode:
2949               emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2950                                                               operands[1],
2951                                                               operands[2]));
2952               break;
2953             case E_V4SFmode:
2954               emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2955                                                               operands[1],
2956                                                               operands[2]));
2957               break;
2958             default:
2959               gcc_unreachable ();
2960           }
2961       }
2962     else
2963       emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2964                                                  operands[1],
2965                                                  operands[2]));
2966     DONE;
2967   }
2970 (define_insn "neon_vc<cmp_op><mode>_insn"
2971   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2972         (neg:<V_cmp_result>
2973           (COMPARISONS:<V_cmp_result>
2974             (match_operand:VDQW 1 "s_register_operand" "w,w")
2975             (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2976   "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2977                     && !flag_unsafe_math_optimizations)"
2978   {
2979     char pattern[100];
2980     sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2981                       " %%<V_reg>1, %s",
2982                        GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2983                          ? "f" : "<cmp_type>",
2984                        which_alternative == 0
2985                          ? "%<V_reg>2" : "#0");
2986     output_asm_insn (pattern, operands);
2987     return "";
2988   }
2989   [(set (attr "type")
2990         (if_then_else (match_operand 2 "zero_operand")
2991                       (const_string "neon_compare_zero<q>")
2992                       (const_string "neon_compare<q>")))]
2995 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2996   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2997         (unspec:<V_cmp_result>
2998           [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2999            (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3000           NEON_VCMP))]
3001   "TARGET_NEON"
3002   {
3003     char pattern[100];
3004     sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3005                        " %%<V_reg>1, %s",
3006                        which_alternative == 0
3007                          ? "%<V_reg>2" : "#0");
3008     output_asm_insn (pattern, operands);
3009     return "";
3011   [(set_attr "type" "neon_fp_compare_s<q>")]
3014 (define_expand "neon_vc<cmp_op><mode>"
3015  [(match_operand:<V_cmp_result> 0 "s_register_operand")
3016   (neg:<V_cmp_result>
3017    (COMPARISONS:VH
3018     (match_operand:VH 1 "s_register_operand")
3019     (match_operand:VH 2 "reg_or_zero_operand")))]
3020  "TARGET_NEON_FP16INST"
3022   /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3023      are enabled.  */
3024   if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3025       && !flag_unsafe_math_optimizations)
3026     emit_insn
3027       (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3028        (operands[0], operands[1], operands[2]));
3029   else
3030     emit_insn
3031       (gen_neon_vc<cmp_op><mode>_fp16insn
3032        (operands[0], operands[1], operands[2]));
3033   DONE;
3036 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3037  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3038    (neg:<V_cmp_result>
3039     (COMPARISONS:<V_cmp_result>
3040      (match_operand:VH 1 "s_register_operand" "w,w")
3041      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3042  "TARGET_NEON_FP16INST
3043   && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3044   && !flag_unsafe_math_optimizations)"
3046   char pattern[100];
3047   sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3048            " %%<V_reg>1, %s",
3049            GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3050            ? "f" : "<cmp_type>",
3051            which_alternative == 0
3052            ? "%<V_reg>2" : "#0");
3053   output_asm_insn (pattern, operands);
3054   return "";
3056  [(set (attr "type")
3057    (if_then_else (match_operand 2 "zero_operand")
3058     (const_string "neon_compare_zero<q>")
3059     (const_string "neon_compare<q>")))])
3061 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3062  [(set
3063    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3064    (unspec:<V_cmp_result>
3065     [(match_operand:VH 1 "s_register_operand" "w,w")
3066      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3067     NEON_VCMP))]
3068  "TARGET_NEON_FP16INST"
3070   char pattern[100];
3071   sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3072            " %%<V_reg>1, %s",
3073            which_alternative == 0
3074            ? "%<V_reg>2" : "#0");
3075   output_asm_insn (pattern, operands);
3076   return "";
3078  [(set_attr "type" "neon_fp_compare_s<q>")])
3080 (define_insn "neon_vc<cmp_op>u<mode>"
3081   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3082         (neg:<V_cmp_result>
3083           (GTUGEU:<V_cmp_result>
3084             (match_operand:VDQIW 1 "s_register_operand" "w")
3085             (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3086   "TARGET_NEON"
3087   "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3088   [(set_attr "type" "neon_compare<q>")]
3091 (define_expand "neon_vca<cmp_op><mode>"
3092   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3093         (neg:<V_cmp_result>
3094           (GTGE:<V_cmp_result>
3095             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3096             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3097   "TARGET_NEON"
3098   {
3099     if (flag_unsafe_math_optimizations)
3100       emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3101                                                   operands[2]));
3102     else
3103       emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3104                                                          operands[1],
3105                                                          operands[2]));
3106     DONE;
3107   }
3110 (define_insn "neon_vca<cmp_op><mode>_insn"
3111   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3112         (neg:<V_cmp_result>
3113           (GTGE:<V_cmp_result>
3114             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3115             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3116   "TARGET_NEON && flag_unsafe_math_optimizations"
3117   "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3118   [(set_attr "type" "neon_fp_compare_s<q>")]
3121 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3122   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3123         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3124                                 (match_operand:VCVTF 2 "s_register_operand" "w")]
3125                                NEON_VACMP))]
3126   "TARGET_NEON"
3127   "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3128   [(set_attr "type" "neon_fp_compare_s<q>")]
3131 (define_expand "neon_vca<cmp_op><mode>"
3132   [(set
3133     (match_operand:<V_cmp_result> 0 "s_register_operand")
3134     (neg:<V_cmp_result>
3135      (GLTE:<V_cmp_result>
3136       (abs:VH (match_operand:VH 1 "s_register_operand"))
3137       (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3138  "TARGET_NEON_FP16INST"
3140   if (flag_unsafe_math_optimizations)
3141     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3142                (operands[0], operands[1], operands[2]));
3143   else
3144     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3145                (operands[0], operands[1], operands[2]));
3146   DONE;
3149 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3150   [(set
3151     (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3152     (neg:<V_cmp_result>
3153      (GLTE:<V_cmp_result>
3154       (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3155       (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3156  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3157  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3158  [(set_attr "type" "neon_fp_compare_s<q>")]
3161 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3162  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3163    (unspec:<V_cmp_result>
3164     [(match_operand:VH 1 "s_register_operand" "w")
3165      (match_operand:VH 2 "s_register_operand" "w")]
3166     NEON_VAGLTE))]
3167  "TARGET_NEON"
3168  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3169  [(set_attr "type" "neon_fp_compare_s<q>")]
3172 (define_expand "neon_vc<cmp_op>z<mode>"
3173  [(set
3174    (match_operand:<V_cmp_result> 0 "s_register_operand")
3175    (COMPARISONS:<V_cmp_result>
3176     (match_operand:VH 1 "s_register_operand")
3177     (const_int 0)))]
3178  "TARGET_NEON_FP16INST"
3180   emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3181                                         CONST0_RTX (<MODE>mode)));
3182   DONE;
3185 (define_insn "neon_vtst<mode>"
3186   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3187         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3188                        (match_operand:VDQIW 2 "s_register_operand" "w")]
3189                       UNSPEC_VTST))]
3190   "TARGET_NEON"
3191   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3192   [(set_attr "type" "neon_tst<q>")]
3195 (define_insn "neon_vabd<sup><mode>"
3196   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3197         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3198                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3199                      VABD))]
3200   "TARGET_NEON"
3201   "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3202   [(set_attr "type" "neon_abd<q>")]
3205 (define_insn "neon_vabd<mode>"
3206   [(set (match_operand:VH 0 "s_register_operand" "=w")
3207     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3208                 (match_operand:VH 2 "s_register_operand" "w")]
3209      UNSPEC_VABD_F))]
3210  "TARGET_NEON_FP16INST"
3211  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3212   [(set_attr "type" "neon_abd<q>")]
3215 (define_insn "neon_vabdf<mode>"
3216   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3217         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3218                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3219                      UNSPEC_VABD_F))]
3220   "TARGET_NEON"
3221   "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3222   [(set_attr "type" "neon_fp_abd_s<q>")]
3225 (define_insn "neon_vabdl<sup><mode>"
3226   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3228                            (match_operand:VW 2 "s_register_operand" "w")]
3229                           VABDL))]
3230   "TARGET_NEON"
3231   "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3232   [(set_attr "type" "neon_abd_long")]
3235 (define_insn "neon_vaba<sup><mode>"
3236   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3237         (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3238                                    (match_operand:VDQIW 3 "s_register_operand" "w")]
3239                                   VABD)
3240                     (match_operand:VDQIW 1 "s_register_operand" "0")))]
3241   "TARGET_NEON"
3242   "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3243   [(set_attr "type" "neon_arith_acc<q>")]
3246 (define_insn "neon_vabal<sup><mode>"
3247   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3248         (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3249                                            (match_operand:VW 3 "s_register_operand" "w")]
3250                                            VABDL)
3251                          (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3252   "TARGET_NEON"
3253   "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3254   [(set_attr "type" "neon_arith_acc<q>")]
3257 (define_insn "neon_v<maxmin><sup><mode>"
3258   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3259         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3260                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3261                      VMAXMIN))]
3262   "TARGET_NEON"
3263   "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264   [(set_attr "type" "neon_minmax<q>")]
3267 (define_insn "neon_v<maxmin>f<mode>"
3268   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3269         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3270                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3271                      VMAXMINF))]
3272   "TARGET_NEON"
3273   "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3274   [(set_attr "type" "neon_fp_minmax_s<q>")]
3277 (define_insn "neon_v<maxmin>f<mode>"
3278  [(set (match_operand:VH 0 "s_register_operand" "=w")
3279    (unspec:VH
3280     [(match_operand:VH 1 "s_register_operand" "w")
3281      (match_operand:VH 2 "s_register_operand" "w")]
3282     VMAXMINF))]
3283  "TARGET_NEON_FP16INST"
3284  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3285  [(set_attr "type" "neon_fp_minmax_s<q>")]
3288 (define_insn "neon_vp<maxmin>fv4hf"
3289  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3290    (unspec:V4HF
3291     [(match_operand:V4HF 1 "s_register_operand" "w")
3292      (match_operand:V4HF 2 "s_register_operand" "w")]
3293     VPMAXMINF))]
3294  "TARGET_NEON_FP16INST"
3295  "vp<maxmin>.f16\t%P0, %P1, %P2"
3296   [(set_attr "type" "neon_reduc_minmax")]
3299 (define_insn "neon_<fmaxmin_op><mode>"
3300  [(set
3301    (match_operand:VH 0 "s_register_operand" "=w")
3302    (unspec:VH
3303     [(match_operand:VH 1 "s_register_operand" "w")
3304      (match_operand:VH 2 "s_register_operand" "w")]
3305     VMAXMINFNM))]
3306  "TARGET_NEON_FP16INST"
3307  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3308  [(set_attr "type" "neon_fp_minmax_s<q>")]
3311 ;; v<maxmin>nm intrinsics.
3312 (define_insn "neon_<fmaxmin_op><mode>"
3313   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3314         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3315                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3316                        VMAXMINFNM))]
3317   "TARGET_NEON && TARGET_VFP5"
3318   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3319   [(set_attr "type" "neon_fp_minmax_s<q>")]
3322 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3323 (define_insn "<fmaxmin><mode>3"
3324   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3325         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3326                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3327                        VMAXMINFNM))]
3328   "TARGET_NEON && TARGET_VFP5"
3329   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3330   [(set_attr "type" "neon_fp_minmax_s<q>")]
3333 (define_expand "neon_vpadd<mode>"
3334   [(match_operand:VD 0 "s_register_operand" "=w")
3335    (match_operand:VD 1 "s_register_operand" "w")
3336    (match_operand:VD 2 "s_register_operand" "w")]
3337   "TARGET_NEON"
3339   emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3340                                             operands[2]));
3341   DONE;
3344 (define_insn "neon_vpaddl<sup><mode>"
3345   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3346         (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3347                                  VPADDL))]
3348   "TARGET_NEON"
3349   "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3350   [(set_attr "type" "neon_reduc_add_long")]
3353 (define_insn "neon_vpadal<sup><mode>"
3354   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3355         (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3356                                   (match_operand:VDQIW 2 "s_register_operand" "w")]
3357                                  VPADAL))]
3358   "TARGET_NEON"
3359   "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3360   [(set_attr "type" "neon_reduc_add_acc")]
3363 (define_insn "neon_vp<maxmin><sup><mode>"
3364   [(set (match_operand:VDI 0 "s_register_operand" "=w")
3365         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3366                     (match_operand:VDI 2 "s_register_operand" "w")]
3367                    VPMAXMIN))]
3368   "TARGET_NEON"
3369   "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3370   [(set_attr "type" "neon_reduc_minmax<q>")]
3373 (define_insn "neon_vp<maxmin>f<mode>"
3374   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3375         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3376                     (match_operand:VCVTF 2 "s_register_operand" "w")]
3377                    VPMAXMINF))]
3378   "TARGET_NEON"
3379   "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380   [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3383 (define_insn "neon_vrecps<mode>"
3384   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3387                       UNSPEC_VRECPS))]
3388   "TARGET_NEON"
3389   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390   [(set_attr "type" "neon_fp_recps_s<q>")]
3393 (define_insn "neon_vrecps<mode>"
3394   [(set
3395     (match_operand:VH 0 "s_register_operand" "=w")
3396     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397                 (match_operand:VH 2 "s_register_operand" "w")]
3398      UNSPEC_VRECPS))]
3399   "TARGET_NEON_FP16INST"
3400   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401   [(set_attr "type" "neon_fp_recps_s<q>")]
3404 (define_insn "neon_vrsqrts<mode>"
3405   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3406         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3407                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3408                       UNSPEC_VRSQRTS))]
3409   "TARGET_NEON"
3410   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3411   [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3414 (define_insn "neon_vrsqrts<mode>"
3415   [(set
3416     (match_operand:VH 0 "s_register_operand" "=w")
3417     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3418                  (match_operand:VH 2 "s_register_operand" "w")]
3419      UNSPEC_VRSQRTS))]
3420  "TARGET_NEON_FP16INST"
3421  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3422  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3425 (define_expand "neon_vabs<mode>"
3426   [(match_operand:VDQW 0 "s_register_operand" "")
3427    (match_operand:VDQW 1 "s_register_operand" "")]
3428   "TARGET_NEON"
3430   emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3431   DONE;
3434 (define_insn "neon_vqabs<mode>"
3435   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3436         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3437                       UNSPEC_VQABS))]
3438   "TARGET_NEON"
3439   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3440   [(set_attr "type" "neon_qabs<q>")]
3443 (define_insn "neon_bswap<mode>"
3444   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3445         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3446   "TARGET_NEON"
3447   "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3448   [(set_attr "type" "neon_rev<q>")]
3451 (define_expand "neon_vneg<mode>"
3452   [(match_operand:VDQW 0 "s_register_operand" "")
3453    (match_operand:VDQW 1 "s_register_operand" "")]
3454   "TARGET_NEON"
3456   emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3457   DONE;
3460 ;; These instructions map to the __builtins for the Dot Product operations.
3461 (define_insn "neon_<sup>dot<vsi2qi>"
3462   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3463         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3464                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3465                                                         "register_operand" "w")
3466                                    (match_operand:<VSI2QI> 3
3467                                                         "register_operand" "w")]
3468                 DOTPROD)))]
3469   "TARGET_DOTPROD"
3470   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3471   [(set_attr "type" "neon_dot")]
3474 ;; These instructions map to the __builtins for the Dot Product
3475 ;; indexed operations.
3476 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3477   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3478         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3479                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3480                                                         "register_operand" "w")
3481                                    (match_operand:V8QI 3 "register_operand" "t")
3482                                    (match_operand:SI 4 "immediate_operand" "i")]
3483                 DOTPROD)))]
3484   "TARGET_DOTPROD"
3485   {
3486     operands[4]
3487       = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3488     return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3489   }
3490   [(set_attr "type" "neon_dot")]
3493 ;; These expands map to the Dot Product optab the vectorizer checks for.
3494 ;; The auto-vectorizer expects a dot product builtin that also does an
3495 ;; accumulation into the provided register.
3496 ;; Given the following pattern
3498 ;; for (i=0; i<len; i++) {
3499 ;;     c = a[i] * b[i];
3500 ;;     r += c;
3501 ;; }
3502 ;; return result;
3504 ;; This can be auto-vectorized to
3505 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3507 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
3508 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3509 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3510 ;; ...
3512 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3513 (define_expand "<sup>dot_prod<vsi2qi>"
3514   [(set (match_operand:VCVTI 0 "register_operand")
3515         (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3516                                                         "register_operand")
3517                                    (match_operand:<VSI2QI> 2
3518                                                         "register_operand")]
3519                      DOTPROD)
3520                     (match_operand:VCVTI 3 "register_operand")))]
3521   "TARGET_DOTPROD"
3523   emit_insn (
3524     gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3525                                  operands[2]));
3526   emit_insn (gen_rtx_SET (operands[0], operands[3]));
3527   DONE;
3530 (define_expand "neon_copysignf<mode>"
3531   [(match_operand:VCVTF 0 "register_operand")
3532    (match_operand:VCVTF 1 "register_operand")
3533    (match_operand:VCVTF 2 "register_operand")]
3534   "TARGET_NEON"
3535   "{
3536      rtx v_bitmask_cast;
3537      rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3538      rtx c = GEN_INT (0x80000000);
3540      emit_move_insn (v_bitmask,
3541                      gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3542      emit_move_insn (operands[0], operands[2]);
3543      v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3544                                            <VCVTF:V_cmp_result>mode, 0);
3545      emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3546                                      operands[1]));
3548      DONE;
3549   }"
3552 (define_insn "neon_vqneg<mode>"
3553   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3554         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3555                       UNSPEC_VQNEG))]
3556   "TARGET_NEON"
3557   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3558   [(set_attr "type" "neon_qneg<q>")]
3561 (define_insn "neon_vcls<mode>"
3562   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3563         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3564                       UNSPEC_VCLS))]
3565   "TARGET_NEON"
3566   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3567   [(set_attr "type" "neon_cls<q>")]
3570 (define_insn "clz<mode>2"
3571   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3572         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3573   "TARGET_NEON"
3574   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3575   [(set_attr "type" "neon_cnt<q>")]
3578 (define_expand "neon_vclz<mode>"
3579   [(match_operand:VDQIW 0 "s_register_operand" "")
3580    (match_operand:VDQIW 1 "s_register_operand" "")]
3581   "TARGET_NEON"
3583   emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3584   DONE;
3587 (define_insn "popcount<mode>2"
3588   [(set (match_operand:VE 0 "s_register_operand" "=w")
3589         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3590   "TARGET_NEON"
3591   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3592   [(set_attr "type" "neon_cnt<q>")]
3595 (define_expand "neon_vcnt<mode>"
3596   [(match_operand:VE 0 "s_register_operand" "=w")
3597    (match_operand:VE 1 "s_register_operand" "w")]
3598   "TARGET_NEON"
3600   emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3601   DONE;
3604 (define_insn "neon_vrecpe<mode>"
3605   [(set (match_operand:VH 0 "s_register_operand" "=w")
3606         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3607                    UNSPEC_VRECPE))]
3608   "TARGET_NEON_FP16INST"
3609   "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3610   [(set_attr "type" "neon_fp_recpe_s<q>")]
3613 (define_insn "neon_vrecpe<mode>"
3614   [(set (match_operand:V32 0 "s_register_operand" "=w")
3615         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3616                     UNSPEC_VRECPE))]
3617   "TARGET_NEON"
3618   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3619   [(set_attr "type" "neon_fp_recpe_s<q>")]
3622 (define_insn "neon_vrsqrte<mode>"
3623   [(set (match_operand:V32 0 "s_register_operand" "=w")
3624         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3625                     UNSPEC_VRSQRTE))]
3626   "TARGET_NEON"
3627   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3628   [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3631 (define_expand "neon_vmvn<mode>"
3632   [(match_operand:VDQIW 0 "s_register_operand" "")
3633    (match_operand:VDQIW 1 "s_register_operand" "")]
3634   "TARGET_NEON"
3636   emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3637   DONE;
3640 (define_insn "neon_vget_lane<mode>_sext_internal"
3641   [(set (match_operand:SI 0 "s_register_operand" "=r")
3642         (sign_extend:SI
3643           (vec_select:<V_elem>
3644             (match_operand:VD 1 "s_register_operand" "w")
3645             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3646   "TARGET_NEON"
3648   if (BYTES_BIG_ENDIAN)
3649     {
3650       int elt = INTVAL (operands[2]);
3651       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3652       operands[2] = GEN_INT (elt);
3653     }
3654   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3656   [(set_attr "type" "neon_to_gp")]
3659 (define_insn "neon_vget_lane<mode>_zext_internal"
3660   [(set (match_operand:SI 0 "s_register_operand" "=r")
3661         (zero_extend:SI
3662           (vec_select:<V_elem>
3663             (match_operand:VD 1 "s_register_operand" "w")
3664             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3665   "TARGET_NEON"
3667   if (BYTES_BIG_ENDIAN)
3668     {
3669       int elt = INTVAL (operands[2]);
3670       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3671       operands[2] = GEN_INT (elt);
3672     }
3673   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3675   [(set_attr "type" "neon_to_gp")]
3678 (define_insn "neon_vget_lane<mode>_sext_internal"
3679   [(set (match_operand:SI 0 "s_register_operand" "=r")
3680         (sign_extend:SI
3681           (vec_select:<V_elem>
3682             (match_operand:VQ2 1 "s_register_operand" "w")
3683             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3684   "TARGET_NEON"
3686   rtx ops[3];
3687   int regno = REGNO (operands[1]);
3688   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3689   unsigned int elt = INTVAL (operands[2]);
3690   unsigned int elt_adj = elt % halfelts;
3692   if (BYTES_BIG_ENDIAN)
3693     elt_adj = halfelts - 1 - elt_adj;
3695   ops[0] = operands[0];
3696   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3697   ops[2] = GEN_INT (elt_adj);
3698   output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3700   return "";
3702   [(set_attr "type" "neon_to_gp_q")]
3705 (define_insn "neon_vget_lane<mode>_zext_internal"
3706   [(set (match_operand:SI 0 "s_register_operand" "=r")
3707         (zero_extend:SI
3708           (vec_select:<V_elem>
3709             (match_operand:VQ2 1 "s_register_operand" "w")
3710             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3711   "TARGET_NEON"
3713   rtx ops[3];
3714   int regno = REGNO (operands[1]);
3715   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3716   unsigned int elt = INTVAL (operands[2]);
3717   unsigned int elt_adj = elt % halfelts;
3719   if (BYTES_BIG_ENDIAN)
3720     elt_adj = halfelts - 1 - elt_adj;
3722   ops[0] = operands[0];
3723   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3724   ops[2] = GEN_INT (elt_adj);
3725   output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3727   return "";
3729   [(set_attr "type" "neon_to_gp_q")]
3732 (define_expand "neon_vget_lane<mode>"
3733   [(match_operand:<V_ext> 0 "s_register_operand" "")
3734    (match_operand:VDQW 1 "s_register_operand" "")
3735    (match_operand:SI 2 "immediate_operand" "")]
3736   "TARGET_NEON"
3738   if (BYTES_BIG_ENDIAN)
3739     {
3740       /* The intrinsics are defined in terms of a model where the
3741          element ordering in memory is vldm order, whereas the generic
3742          RTL is defined in terms of a model where the element ordering
3743          in memory is array order.  Convert the lane number to conform
3744          to this model.  */
3745       unsigned int elt = INTVAL (operands[2]);
3746       unsigned int reg_nelts
3747         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3748       elt ^= reg_nelts - 1;
3749       operands[2] = GEN_INT (elt);
3750     }
3752   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3753     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3754                                                 operands[2]));
3755   else
3756     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3757                                                        operands[1],
3758                                                        operands[2]));
3759   DONE;
3762 (define_expand "neon_vget_laneu<mode>"
3763   [(match_operand:<V_ext> 0 "s_register_operand" "")
3764    (match_operand:VDQIW 1 "s_register_operand" "")
3765    (match_operand:SI 2 "immediate_operand" "")]
3766   "TARGET_NEON"
3768   if (BYTES_BIG_ENDIAN)
3769     {
3770       /* The intrinsics are defined in terms of a model where the
3771          element ordering in memory is vldm order, whereas the generic
3772          RTL is defined in terms of a model where the element ordering
3773          in memory is array order.  Convert the lane number to conform
3774          to this model.  */
3775       unsigned int elt = INTVAL (operands[2]);
3776       unsigned int reg_nelts
3777         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3778       elt ^= reg_nelts - 1;
3779       operands[2] = GEN_INT (elt);
3780     }
3782   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3783     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3784                                                 operands[2]));
3785   else
3786     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3787                                                        operands[1],
3788                                                        operands[2]));
3789   DONE;
3792 (define_expand "neon_vget_lanedi"
3793   [(match_operand:DI 0 "s_register_operand" "=r")
3794    (match_operand:DI 1 "s_register_operand" "w")
3795    (match_operand:SI 2 "immediate_operand" "")]
3796   "TARGET_NEON"
3798   emit_move_insn (operands[0], operands[1]);
3799   DONE;
3802 (define_expand "neon_vget_lanev2di"
3803   [(match_operand:DI 0 "s_register_operand" "")
3804    (match_operand:V2DI 1 "s_register_operand" "")
3805    (match_operand:SI 2 "immediate_operand" "")]
3806   "TARGET_NEON"
3808   int lane;
3810 if (BYTES_BIG_ENDIAN)
3811     {
3812       /* The intrinsics are defined in terms of a model where the
3813          element ordering in memory is vldm order, whereas the generic
3814          RTL is defined in terms of a model where the element ordering
3815          in memory is array order.  Convert the lane number to conform
3816          to this model.  */
3817       unsigned int elt = INTVAL (operands[2]);
3818       unsigned int reg_nelts = 2;
3819       elt ^= reg_nelts - 1;
3820       operands[2] = GEN_INT (elt);
3821     }
3823   lane = INTVAL (operands[2]);
3824   gcc_assert ((lane ==0) || (lane == 1));
3825   emit_move_insn (operands[0], lane == 0
3826                                 ? gen_lowpart (DImode, operands[1])
3827                                 : gen_highpart (DImode, operands[1]));
3828   DONE;
3831 (define_expand "neon_vset_lane<mode>"
3832   [(match_operand:VDQ 0 "s_register_operand" "=w")
3833    (match_operand:<V_elem> 1 "s_register_operand" "r")
3834    (match_operand:VDQ 2 "s_register_operand" "0")
3835    (match_operand:SI 3 "immediate_operand" "i")]
3836   "TARGET_NEON"
3838   unsigned int elt = INTVAL (operands[3]);
3840   if (BYTES_BIG_ENDIAN)
3841     {
3842       unsigned int reg_nelts
3843         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3844       elt ^= reg_nelts - 1;
3845     }
3847   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3848                                          GEN_INT (1 << elt), operands[2]));
3849   DONE;
3852 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3854 (define_expand "neon_vset_lanedi"
3855   [(match_operand:DI 0 "s_register_operand" "=w")
3856    (match_operand:DI 1 "s_register_operand" "r")
3857    (match_operand:DI 2 "s_register_operand" "0")
3858    (match_operand:SI 3 "immediate_operand" "i")]
3859   "TARGET_NEON"
3861   emit_move_insn (operands[0], operands[1]);
3862   DONE;
3865 (define_expand "neon_vcreate<mode>"
3866   [(match_operand:VD_RE 0 "s_register_operand" "")
3867    (match_operand:DI 1 "general_operand" "")]
3868   "TARGET_NEON"
3870   rtx src = gen_lowpart (<MODE>mode, operands[1]);
3871   emit_move_insn (operands[0], src);
3872   DONE;
3875 (define_insn "neon_vdup_n<mode>"
3876   [(set (match_operand:VX 0 "s_register_operand" "=w")
3877         (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3878   "TARGET_NEON"
3879   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3880   [(set_attr "type" "neon_from_gp<q>")]
3883 (define_insn "neon_vdup_nv4hf"
3884   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3885         (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3886   "TARGET_NEON"
3887   "vdup.16\t%P0, %1"
3888   [(set_attr "type" "neon_from_gp")]
3891 (define_insn "neon_vdup_nv8hf"
3892   [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3893         (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3894   "TARGET_NEON"
3895   "vdup.16\t%q0, %1"
3896   [(set_attr "type" "neon_from_gp_q")]
3899 (define_insn "neon_vdup_n<mode>"
3900   [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3901         (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3902   "TARGET_NEON"
3903   "@
3904   vdup.<V_sz_elem>\t%<V_reg>0, %1
3905   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3906   [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3909 (define_expand "neon_vdup_ndi"
3910   [(match_operand:DI 0 "s_register_operand" "=w")
3911    (match_operand:DI 1 "s_register_operand" "r")]
3912   "TARGET_NEON"
3914   emit_move_insn (operands[0], operands[1]);
3915   DONE;
3919 (define_insn "neon_vdup_nv2di"
3920   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3921         (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3922   "TARGET_NEON"
3923   "@
3924   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3925   vmov\t%e0, %P1\;vmov\t%f0, %P1"
3926   [(set_attr "length" "8")
3927    (set_attr "type" "multiple")]
3930 (define_insn "neon_vdup_lane<mode>_internal"
3931   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3932         (vec_duplicate:VDQW 
3933           (vec_select:<V_elem>
3934             (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3935             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3936   "TARGET_NEON"
3938   if (BYTES_BIG_ENDIAN)
3939     {
3940       int elt = INTVAL (operands[2]);
3941       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3942       operands[2] = GEN_INT (elt);
3943     }
3944   if (<Is_d_reg>)
3945     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3946   else
3947     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3949   [(set_attr "type" "neon_dup<q>")]
3952 (define_insn "neon_vdup_lane<mode>_internal"
3953  [(set (match_operand:VH 0 "s_register_operand" "=w")
3954    (vec_duplicate:VH
3955     (vec_select:<V_elem>
3956      (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3957      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3958  "TARGET_NEON && TARGET_FP16"
3960   if (BYTES_BIG_ENDIAN)
3961     {
3962       int elt = INTVAL (operands[2]);
3963       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3964       operands[2] = GEN_INT (elt);
3965     }
3966   if (<Is_d_reg>)
3967     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3968   else
3969     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3971   [(set_attr "type" "neon_dup<q>")]
3974 (define_expand "neon_vdup_lane<mode>"
3975   [(match_operand:VDQW 0 "s_register_operand" "=w")
3976    (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3977    (match_operand:SI 2 "immediate_operand" "i")]
3978   "TARGET_NEON"
3980   if (BYTES_BIG_ENDIAN)
3981     {
3982       unsigned int elt = INTVAL (operands[2]);
3983       unsigned int reg_nelts
3984         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3985       elt ^= reg_nelts - 1;
3986       operands[2] = GEN_INT (elt);
3987     }
3988     emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3989                                                   operands[2]));
3990     DONE;
3993 (define_expand "neon_vdup_lane<mode>"
3994   [(match_operand:VH 0 "s_register_operand")
3995    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3996    (match_operand:SI 2 "immediate_operand")]
3997   "TARGET_NEON && TARGET_FP16"
3999   if (BYTES_BIG_ENDIAN)
4000     {
4001       unsigned int elt = INTVAL (operands[2]);
4002       unsigned int reg_nelts
4003         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4004       elt ^= reg_nelts - 1;
4005       operands[2] = GEN_INT (elt);
4006     }
4007   emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4008                                                 operands[2]));
4009   DONE;
4012 ; Scalar index is ignored, since only zero is valid here.
4013 (define_expand "neon_vdup_lanedi"
4014   [(match_operand:DI 0 "s_register_operand" "=w")
4015    (match_operand:DI 1 "s_register_operand" "w")
4016    (match_operand:SI 2 "immediate_operand" "i")]
4017   "TARGET_NEON"
4019   emit_move_insn (operands[0], operands[1]);
4020   DONE;
4023 ; Likewise for v2di, as the DImode second operand has only a single element.
4024 (define_expand "neon_vdup_lanev2di"
4025   [(match_operand:V2DI 0 "s_register_operand" "=w")
4026    (match_operand:DI 1 "s_register_operand" "w")
4027    (match_operand:SI 2 "immediate_operand" "i")]
4028   "TARGET_NEON"
4030   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4031   DONE;
4034 ; Disabled before reload because we don't want combine doing something silly,
4035 ; but used by the post-reload expansion of neon_vcombine.
4036 (define_insn "*neon_vswp<mode>"
4037   [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4038         (match_operand:VDQX 1 "s_register_operand" "+w"))
4039    (set (match_dup 1) (match_dup 0))]
4040   "TARGET_NEON && reload_completed"
4041   "vswp\t%<V_reg>0, %<V_reg>1"
4042   [(set_attr "type" "neon_permute<q>")]
4045 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4046 ;; dest vector.
4047 ;; FIXME: A different implementation of this builtin could make it much
4048 ;; more likely that we wouldn't actually need to output anything (we could make
4049 ;; it so that the reg allocator puts things in the right places magically
4050 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4052 (define_insn_and_split "neon_vcombine<mode>"
4053   [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4054         (vec_concat:<V_DOUBLE>
4055           (match_operand:VDX 1 "s_register_operand" "w")
4056           (match_operand:VDX 2 "s_register_operand" "w")))]
4057   "TARGET_NEON"
4058   "#"
4059   "&& reload_completed"
4060   [(const_int 0)]
4062   neon_split_vcombine (operands);
4063   DONE;
4065 [(set_attr "type" "multiple")]
4068 (define_expand "neon_vget_high<mode>"
4069   [(match_operand:<V_HALF> 0 "s_register_operand")
4070    (match_operand:VQX 1 "s_register_operand")]
4071   "TARGET_NEON"
4073   emit_move_insn (operands[0],
4074                   simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4075                                        GET_MODE_SIZE (<V_HALF>mode)));
4076   DONE;
4079 (define_expand "neon_vget_low<mode>"
4080   [(match_operand:<V_HALF> 0 "s_register_operand")
4081    (match_operand:VQX 1 "s_register_operand")]
4082   "TARGET_NEON"
4084   emit_move_insn (operands[0],
4085                   simplify_gen_subreg (<V_HALF>mode, operands[1],
4086                                        <MODE>mode, 0));
4087   DONE;
4090 (define_insn "float<mode><V_cvtto>2"
4091   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4092         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4093   "TARGET_NEON && !flag_rounding_math"
4094   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4095   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4098 (define_insn "floatuns<mode><V_cvtto>2"
4099   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4100         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 
4101   "TARGET_NEON && !flag_rounding_math"
4102   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4103   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4106 (define_insn "fix_trunc<mode><V_cvtto>2"
4107   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4108         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4109   "TARGET_NEON"
4110   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4111   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4114 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4115   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4116         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4117   "TARGET_NEON"
4118   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4119   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4122 (define_insn "neon_vcvt<sup><mode>"
4123   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4124         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4125                           VCVT_US))]
4126   "TARGET_NEON"
4127   "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4128   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4131 (define_insn "neon_vcvt<sup><mode>"
4132   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4133         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4134                           VCVT_US))]
4135   "TARGET_NEON"
4136   "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4137   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4140 (define_insn "neon_vcvtv4sfv4hf"
4141   [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4142         (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4143                           UNSPEC_VCVT))]
4144   "TARGET_NEON && TARGET_FP16"
4145   "vcvt.f32.f16\t%q0, %P1"
4146   [(set_attr "type" "neon_fp_cvt_widen_h")]
4149 (define_insn "neon_vcvtv4hfv4sf"
4150   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4151         (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4152                           UNSPEC_VCVT))]
4153   "TARGET_NEON && TARGET_FP16"
4154   "vcvt.f16.f32\t%P0, %q1"
4155   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4158 (define_insn "neon_vcvt<sup><mode>"
4159  [(set
4160    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4161    (unspec:<VH_CVTTO>
4162     [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4163     VCVT_US))]
4164  "TARGET_NEON_FP16INST"
4165  "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4166   [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4169 (define_insn "neon_vcvt<sup><mode>"
4170  [(set
4171    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4172    (unspec:<VH_CVTTO>
4173     [(match_operand:VH 1 "s_register_operand" "w")]
4174     VCVT_US))]
4175  "TARGET_NEON_FP16INST"
4176  "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4177   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4180 (define_insn "neon_vcvt<sup>_n<mode>"
4181   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4182         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4183                            (match_operand:SI 2 "immediate_operand" "i")]
4184                           VCVT_US_N))]
4185   "TARGET_NEON"
4187   arm_const_bounds (operands[2], 1, 33);
4188   return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4190   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4193 (define_insn "neon_vcvt<sup>_n<mode>"
4194  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4195    (unspec:<VH_CVTTO>
4196     [(match_operand:VH 1 "s_register_operand" "w")
4197      (match_operand:SI 2 "immediate_operand" "i")]
4198     VCVT_US_N))]
4199   "TARGET_NEON_FP16INST"
4201   arm_const_bounds (operands[2], 0, 17);
4202   return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4204  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4207 (define_insn "neon_vcvt<sup>_n<mode>"
4208   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4209         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4210                            (match_operand:SI 2 "immediate_operand" "i")]
4211                           VCVT_US_N))]
4212   "TARGET_NEON"
4214   arm_const_bounds (operands[2], 1, 33);
4215   return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4217   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4220 (define_insn "neon_vcvt<sup>_n<mode>"
4221  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4222    (unspec:<VH_CVTTO>
4223     [(match_operand:VCVTHI 1 "s_register_operand" "w")
4224      (match_operand:SI 2 "immediate_operand" "i")]
4225     VCVT_US_N))]
4226  "TARGET_NEON_FP16INST"
4228   arm_const_bounds (operands[2], 0, 17);
4229   return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4231  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4234 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4235  [(set
4236    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4237    (unspec:<VH_CVTTO>
4238     [(match_operand:VH 1 "s_register_operand" "w")]
4239     VCVT_HF_US))]
4240  "TARGET_NEON_FP16INST"
4241  "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4242   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4245 (define_insn "neon_vmovn<mode>"
4246   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4247         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4248                            UNSPEC_VMOVN))]
4249   "TARGET_NEON"
4250   "vmovn.<V_if_elem>\t%P0, %q1"
4251   [(set_attr "type" "neon_shift_imm_narrow_q")]
4254 (define_insn "neon_vqmovn<sup><mode>"
4255   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4256         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4257                            VQMOVN))]
4258   "TARGET_NEON"
4259   "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4260   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4263 (define_insn "neon_vqmovun<mode>"
4264   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4265         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4266                            UNSPEC_VQMOVUN))]
4267   "TARGET_NEON"
4268   "vqmovun.<V_s_elem>\t%P0, %q1"
4269   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4272 (define_insn "neon_vmovl<sup><mode>"
4273   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4274         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4275                           VMOVL))]
4276   "TARGET_NEON"
4277   "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4278   [(set_attr "type" "neon_shift_imm_long")]
4281 (define_insn "neon_vmul_lane<mode>"
4282   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4283         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4284                      (match_operand:VMD 2 "s_register_operand"
4285                                         "<scalar_mul_constraint>")
4286                      (match_operand:SI 3 "immediate_operand" "i")]
4287                     UNSPEC_VMUL_LANE))]
4288   "TARGET_NEON"
4290   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4292   [(set (attr "type")
4293      (if_then_else (match_test "<Is_float_mode>")
4294                    (const_string "neon_fp_mul_s_scalar<q>")
4295                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4298 (define_insn "neon_vmul_lane<mode>"
4299   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4300         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4301                      (match_operand:<V_HALF> 2 "s_register_operand"
4302                                              "<scalar_mul_constraint>")
4303                      (match_operand:SI 3 "immediate_operand" "i")]
4304                     UNSPEC_VMUL_LANE))]
4305   "TARGET_NEON"
4307   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4309   [(set (attr "type")
4310      (if_then_else (match_test "<Is_float_mode>")
4311                    (const_string "neon_fp_mul_s_scalar<q>")
4312                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4315 (define_insn "neon_vmul_lane<mode>"
4316   [(set (match_operand:VH 0 "s_register_operand" "=w")
4317         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4318                     (match_operand:V4HF 2 "s_register_operand"
4319                      "<scalar_mul_constraint>")
4320                      (match_operand:SI 3 "immediate_operand" "i")]
4321                      UNSPEC_VMUL_LANE))]
4322   "TARGET_NEON_FP16INST"
4323   "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4324   [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4327 (define_insn "neon_vmull<sup>_lane<mode>"
4328   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4329         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4330                            (match_operand:VMDI 2 "s_register_operand"
4331                                                "<scalar_mul_constraint>")
4332                            (match_operand:SI 3 "immediate_operand" "i")]
4333                           VMULL_LANE))]
4334   "TARGET_NEON"
4336   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4338   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4341 (define_insn "neon_vqdmull_lane<mode>"
4342   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4343         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4344                            (match_operand:VMDI 2 "s_register_operand"
4345                                                "<scalar_mul_constraint>")
4346                            (match_operand:SI 3 "immediate_operand" "i")]
4347                           UNSPEC_VQDMULL_LANE))]
4348   "TARGET_NEON"
4350   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4352   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4355 (define_insn "neon_vq<r>dmulh_lane<mode>"
4356   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4357         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4358                       (match_operand:<V_HALF> 2 "s_register_operand"
4359                                               "<scalar_mul_constraint>")
4360                       (match_operand:SI 3 "immediate_operand" "i")]
4361                       VQDMULH_LANE))]
4362   "TARGET_NEON"
4364   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4366   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4369 (define_insn "neon_vq<r>dmulh_lane<mode>"
4370   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4371         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4372                       (match_operand:VMDI 2 "s_register_operand"
4373                                           "<scalar_mul_constraint>")
4374                       (match_operand:SI 3 "immediate_operand" "i")]
4375                       VQDMULH_LANE))]
4376   "TARGET_NEON"
4378   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4380   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4383 ;; vqrdmlah_lane, vqrdmlsh_lane
4384 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4385   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4386         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4387                       (match_operand:VMQI 2 "s_register_operand" "w")
4388                       (match_operand:<V_HALF> 3 "s_register_operand"
4389                                           "<scalar_mul_constraint>")
4390                       (match_operand:SI 4 "immediate_operand" "i")]
4391                      VQRDMLH_AS))]
4392   "TARGET_NEON_RDMA"
4394   return
4395    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4397   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4400 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4401   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4402         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4403                       (match_operand:VMDI 2 "s_register_operand" "w")
4404                       (match_operand:VMDI 3 "s_register_operand"
4405                                           "<scalar_mul_constraint>")
4406                       (match_operand:SI 4 "immediate_operand" "i")]
4407                      VQRDMLH_AS))]
4408   "TARGET_NEON_RDMA"
4410   return
4411    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4413   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4416 (define_insn "neon_vmla_lane<mode>"
4417   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4418         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4419                      (match_operand:VMD 2 "s_register_operand" "w")
4420                      (match_operand:VMD 3 "s_register_operand"
4421                                         "<scalar_mul_constraint>")
4422                      (match_operand:SI 4 "immediate_operand" "i")]
4423                      UNSPEC_VMLA_LANE))]
4424   "TARGET_NEON"
4426   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4428   [(set (attr "type")
4429      (if_then_else (match_test "<Is_float_mode>")
4430                    (const_string "neon_fp_mla_s_scalar<q>")
4431                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4434 (define_insn "neon_vmla_lane<mode>"
4435   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4436         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4437                      (match_operand:VMQ 2 "s_register_operand" "w")
4438                      (match_operand:<V_HALF> 3 "s_register_operand"
4439                                              "<scalar_mul_constraint>")
4440                      (match_operand:SI 4 "immediate_operand" "i")]
4441                      UNSPEC_VMLA_LANE))]
4442   "TARGET_NEON"
4444   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4446   [(set (attr "type")
4447      (if_then_else (match_test "<Is_float_mode>")
4448                    (const_string "neon_fp_mla_s_scalar<q>")
4449                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4452 (define_insn "neon_vmlal<sup>_lane<mode>"
4453   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4454         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4455                            (match_operand:VMDI 2 "s_register_operand" "w")
4456                            (match_operand:VMDI 3 "s_register_operand"
4457                                                "<scalar_mul_constraint>")
4458                            (match_operand:SI 4 "immediate_operand" "i")]
4459                           VMLAL_LANE))]
4460   "TARGET_NEON"
4462   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4464   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4467 (define_insn "neon_vqdmlal_lane<mode>"
4468   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4469         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4470                            (match_operand:VMDI 2 "s_register_operand" "w")
4471                            (match_operand:VMDI 3 "s_register_operand"
4472                                                "<scalar_mul_constraint>")
4473                            (match_operand:SI 4 "immediate_operand" "i")]
4474                           UNSPEC_VQDMLAL_LANE))]
4475   "TARGET_NEON"
4477   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4479   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4482 (define_insn "neon_vmls_lane<mode>"
4483   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4484         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4485                      (match_operand:VMD 2 "s_register_operand" "w")
4486                      (match_operand:VMD 3 "s_register_operand"
4487                                         "<scalar_mul_constraint>")
4488                      (match_operand:SI 4 "immediate_operand" "i")]
4489                     UNSPEC_VMLS_LANE))]
4490   "TARGET_NEON"
4492   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4494   [(set (attr "type")
4495      (if_then_else (match_test "<Is_float_mode>")
4496                    (const_string "neon_fp_mla_s_scalar<q>")
4497                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4500 (define_insn "neon_vmls_lane<mode>"
4501   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4502         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4503                      (match_operand:VMQ 2 "s_register_operand" "w")
4504                      (match_operand:<V_HALF> 3 "s_register_operand"
4505                                              "<scalar_mul_constraint>")
4506                      (match_operand:SI 4 "immediate_operand" "i")]
4507                     UNSPEC_VMLS_LANE))]
4508   "TARGET_NEON"
4510   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4512   [(set (attr "type")
4513      (if_then_else (match_test "<Is_float_mode>")
4514                    (const_string "neon_fp_mla_s_scalar<q>")
4515                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4518 (define_insn "neon_vmlsl<sup>_lane<mode>"
4519   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4520         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4521                            (match_operand:VMDI 2 "s_register_operand" "w")
4522                            (match_operand:VMDI 3 "s_register_operand"
4523                                                "<scalar_mul_constraint>")
4524                            (match_operand:SI 4 "immediate_operand" "i")]
4525                           VMLSL_LANE))]
4526   "TARGET_NEON"
4528   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4530   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4533 (define_insn "neon_vqdmlsl_lane<mode>"
4534   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4535         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4536                            (match_operand:VMDI 2 "s_register_operand" "w")
4537                            (match_operand:VMDI 3 "s_register_operand"
4538                                                "<scalar_mul_constraint>")
4539                            (match_operand:SI 4 "immediate_operand" "i")]
4540                           UNSPEC_VQDMLSL_LANE))]
4541   "TARGET_NEON"
4543   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4545   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4548 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4549 ; core register into a temp register, then use a scalar taken from that. This
4550 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4551 ; or extracted from another vector. The latter case it's currently better to
4552 ; use the "_lane" variant, and the former case can probably be implemented
4553 ; using vld1_lane, but that hasn't been done yet.
4555 (define_expand "neon_vmul_n<mode>"
4556   [(match_operand:VMD 0 "s_register_operand" "")
4557    (match_operand:VMD 1 "s_register_operand" "")
4558    (match_operand:<V_elem> 2 "s_register_operand" "")]
4559   "TARGET_NEON"
4561   rtx tmp = gen_reg_rtx (<MODE>mode);
4562   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4563   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4564                                        const0_rtx));
4565   DONE;
4568 (define_expand "neon_vmul_n<mode>"
4569   [(match_operand:VMQ 0 "s_register_operand" "")
4570    (match_operand:VMQ 1 "s_register_operand" "")
4571    (match_operand:<V_elem> 2 "s_register_operand" "")]
4572   "TARGET_NEON"
4574   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4575   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4576   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4577                                        const0_rtx));
4578   DONE;
4581 (define_expand "neon_vmul_n<mode>"
4582   [(match_operand:VH 0 "s_register_operand")
4583    (match_operand:VH 1 "s_register_operand")
4584    (match_operand:<V_elem> 2 "s_register_operand")]
4585   "TARGET_NEON_FP16INST"
4587   rtx tmp = gen_reg_rtx (V4HFmode);
4588   emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4589   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4590                                        const0_rtx));
4591   DONE;
4594 (define_expand "neon_vmulls_n<mode>"
4595   [(match_operand:<V_widen> 0 "s_register_operand" "")
4596    (match_operand:VMDI 1 "s_register_operand" "")
4597    (match_operand:<V_elem> 2 "s_register_operand" "")]
4598   "TARGET_NEON"
4600   rtx tmp = gen_reg_rtx (<MODE>mode);
4601   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4602   emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4603                                          const0_rtx));
4604   DONE;
4607 (define_expand "neon_vmullu_n<mode>"
4608   [(match_operand:<V_widen> 0 "s_register_operand" "")
4609    (match_operand:VMDI 1 "s_register_operand" "")
4610    (match_operand:<V_elem> 2 "s_register_operand" "")]
4611   "TARGET_NEON"
4613   rtx tmp = gen_reg_rtx (<MODE>mode);
4614   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4615   emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4616                                          const0_rtx));
4617   DONE;
4620 (define_expand "neon_vqdmull_n<mode>"
4621   [(match_operand:<V_widen> 0 "s_register_operand" "")
4622    (match_operand:VMDI 1 "s_register_operand" "")
4623    (match_operand:<V_elem> 2 "s_register_operand" "")]
4624   "TARGET_NEON"
4626   rtx tmp = gen_reg_rtx (<MODE>mode);
4627   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4628   emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4629                                           const0_rtx));
4630   DONE;
4633 (define_expand "neon_vqdmulh_n<mode>"
4634   [(match_operand:VMDI 0 "s_register_operand" "")
4635    (match_operand:VMDI 1 "s_register_operand" "")
4636    (match_operand:<V_elem> 2 "s_register_operand" "")]
4637   "TARGET_NEON"
4639   rtx tmp = gen_reg_rtx (<MODE>mode);
4640   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4641   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4642                                           const0_rtx));
4643   DONE;
4646 (define_expand "neon_vqrdmulh_n<mode>"
4647   [(match_operand:VMDI 0 "s_register_operand" "")
4648    (match_operand:VMDI 1 "s_register_operand" "")
4649    (match_operand:<V_elem> 2 "s_register_operand" "")]
4650   "TARGET_NEON"
4652   rtx tmp = gen_reg_rtx (<MODE>mode);
4653   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4654   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4655                                           const0_rtx));
4656   DONE;
4659 (define_expand "neon_vqdmulh_n<mode>"
4660   [(match_operand:VMQI 0 "s_register_operand" "")
4661    (match_operand:VMQI 1 "s_register_operand" "")
4662    (match_operand:<V_elem> 2 "s_register_operand" "")]
4663   "TARGET_NEON"
4665   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4666   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4667   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4668                                           const0_rtx));
4669   DONE;
4672 (define_expand "neon_vqrdmulh_n<mode>"
4673   [(match_operand:VMQI 0 "s_register_operand" "")
4674    (match_operand:VMQI 1 "s_register_operand" "")
4675    (match_operand:<V_elem> 2 "s_register_operand" "")]
4676   "TARGET_NEON"
4678   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4679   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4680   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4681                                            const0_rtx));
4682   DONE;
4685 (define_expand "neon_vmla_n<mode>"
4686   [(match_operand:VMD 0 "s_register_operand" "")
4687    (match_operand:VMD 1 "s_register_operand" "")
4688    (match_operand:VMD 2 "s_register_operand" "")
4689    (match_operand:<V_elem> 3 "s_register_operand" "")]
4690   "TARGET_NEON"
4692   rtx tmp = gen_reg_rtx (<MODE>mode);
4693   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4694   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4695                                        tmp, const0_rtx));
4696   DONE;
4699 (define_expand "neon_vmla_n<mode>"
4700   [(match_operand:VMQ 0 "s_register_operand" "")
4701    (match_operand:VMQ 1 "s_register_operand" "")
4702    (match_operand:VMQ 2 "s_register_operand" "")
4703    (match_operand:<V_elem> 3 "s_register_operand" "")]
4704   "TARGET_NEON"
4706   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4707   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4708   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4709                                        tmp, const0_rtx));
4710   DONE;
4713 (define_expand "neon_vmlals_n<mode>"
4714   [(match_operand:<V_widen> 0 "s_register_operand" "")
4715    (match_operand:<V_widen> 1 "s_register_operand" "")
4716    (match_operand:VMDI 2 "s_register_operand" "")
4717    (match_operand:<V_elem> 3 "s_register_operand" "")]
4718   "TARGET_NEON"
4720   rtx tmp = gen_reg_rtx (<MODE>mode);
4721   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4722   emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4723                                          tmp, const0_rtx));
4724   DONE;
4727 (define_expand "neon_vmlalu_n<mode>"
4728   [(match_operand:<V_widen> 0 "s_register_operand" "")
4729    (match_operand:<V_widen> 1 "s_register_operand" "")
4730    (match_operand:VMDI 2 "s_register_operand" "")
4731    (match_operand:<V_elem> 3 "s_register_operand" "")]
4732   "TARGET_NEON"
4734   rtx tmp = gen_reg_rtx (<MODE>mode);
4735   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4736   emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4737                                          tmp, const0_rtx));
4738   DONE;
4741 (define_expand "neon_vqdmlal_n<mode>"
4742   [(match_operand:<V_widen> 0 "s_register_operand" "")
4743    (match_operand:<V_widen> 1 "s_register_operand" "")
4744    (match_operand:VMDI 2 "s_register_operand" "")
4745    (match_operand:<V_elem> 3 "s_register_operand" "")]
4746   "TARGET_NEON"
4748   rtx tmp = gen_reg_rtx (<MODE>mode);
4749   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4750   emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4751                                           tmp, const0_rtx));
4752   DONE;
4755 (define_expand "neon_vmls_n<mode>"
4756   [(match_operand:VMD 0 "s_register_operand" "")
4757    (match_operand:VMD 1 "s_register_operand" "")
4758    (match_operand:VMD 2 "s_register_operand" "")
4759    (match_operand:<V_elem> 3 "s_register_operand" "")]
4760   "TARGET_NEON"
4762   rtx tmp = gen_reg_rtx (<MODE>mode);
4763   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4764   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4765                                        tmp, const0_rtx));
4766   DONE;
4769 (define_expand "neon_vmls_n<mode>"
4770   [(match_operand:VMQ 0 "s_register_operand" "")
4771    (match_operand:VMQ 1 "s_register_operand" "")
4772    (match_operand:VMQ 2 "s_register_operand" "")
4773    (match_operand:<V_elem> 3 "s_register_operand" "")]
4774   "TARGET_NEON"
4776   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4777   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4778   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4779                                        tmp, const0_rtx));
4780   DONE;
4783 (define_expand "neon_vmlsls_n<mode>"
4784   [(match_operand:<V_widen> 0 "s_register_operand" "")
4785    (match_operand:<V_widen> 1 "s_register_operand" "")
4786    (match_operand:VMDI 2 "s_register_operand" "")
4787    (match_operand:<V_elem> 3 "s_register_operand" "")]
4788   "TARGET_NEON"
4790   rtx tmp = gen_reg_rtx (<MODE>mode);
4791   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4792   emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4793                                         tmp, const0_rtx));
4794   DONE;
4797 (define_expand "neon_vmlslu_n<mode>"
4798   [(match_operand:<V_widen> 0 "s_register_operand" "")
4799    (match_operand:<V_widen> 1 "s_register_operand" "")
4800    (match_operand:VMDI 2 "s_register_operand" "")
4801    (match_operand:<V_elem> 3 "s_register_operand" "")]
4802   "TARGET_NEON"
4804   rtx tmp = gen_reg_rtx (<MODE>mode);
4805   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4806   emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4807                                         tmp, const0_rtx));
4808   DONE;
4811 (define_expand "neon_vqdmlsl_n<mode>"
4812   [(match_operand:<V_widen> 0 "s_register_operand" "")
4813    (match_operand:<V_widen> 1 "s_register_operand" "")
4814    (match_operand:VMDI 2 "s_register_operand" "")
4815    (match_operand:<V_elem> 3 "s_register_operand" "")]
4816   "TARGET_NEON"
4818   rtx tmp = gen_reg_rtx (<MODE>mode);
4819   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4820   emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4821                                           tmp, const0_rtx));
4822   DONE;
4825 (define_insn "neon_vext<mode>"
4826   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4827         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4828                       (match_operand:VDQX 2 "s_register_operand" "w")
4829                       (match_operand:SI 3 "immediate_operand" "i")]
4830                      UNSPEC_VEXT))]
4831   "TARGET_NEON"
4833   arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4834   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4836   [(set_attr "type" "neon_ext<q>")]
4839 (define_insn "neon_vrev64<mode>"
4840   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4841         (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4842                     UNSPEC_VREV64))]
4843   "TARGET_NEON"
4844   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4845   [(set_attr "type" "neon_rev<q>")]
4848 (define_insn "neon_vrev32<mode>"
4849   [(set (match_operand:VX 0 "s_register_operand" "=w")
4850         (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4851                    UNSPEC_VREV32))]
4852   "TARGET_NEON"
4853   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4854   [(set_attr "type" "neon_rev<q>")]
4857 (define_insn "neon_vrev16<mode>"
4858   [(set (match_operand:VE 0 "s_register_operand" "=w")
4859         (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4860                    UNSPEC_VREV16))]
4861   "TARGET_NEON"
4862   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4863   [(set_attr "type" "neon_rev<q>")]
4866 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4867 ; allocation. For an intrinsic of form:
4868 ;   rD = vbsl_* (rS, rN, rM)
4869 ; We can use any of:
4870 ;   vbsl rS, rN, rM  (if D = S)
4871 ;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4872 ;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4874 (define_insn "neon_vbsl<mode>_internal"
4875   [(set (match_operand:VDQX 0 "s_register_operand"               "=w,w,w")
4876         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4877                       (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4878                       (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4879                      UNSPEC_VBSL))]
4880   "TARGET_NEON"
4881   "@
4882   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4883   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4884   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4885   [(set_attr "type" "neon_bsl<q>")]
4888 (define_expand "neon_vbsl<mode>"
4889   [(set (match_operand:VDQX 0 "s_register_operand" "")
4890         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4891                       (match_operand:VDQX 2 "s_register_operand" "")
4892                       (match_operand:VDQX 3 "s_register_operand" "")]
4893                      UNSPEC_VBSL))]
4894   "TARGET_NEON"
4896   /* We can't alias operands together if they have different modes.  */
4897   operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4900 ;; vshl, vrshl
4901 (define_insn "neon_v<shift_op><sup><mode>"
4902   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4903         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4904                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4905                       VSHL))]
4906   "TARGET_NEON"
4907   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4908   [(set_attr "type" "neon_shift_imm<q>")]
4911 ;; vqshl, vqrshl
4912 (define_insn "neon_v<shift_op><sup><mode>"
4913   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4914         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4915                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4916                       VQSHL))]
4917   "TARGET_NEON"
4918   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4919   [(set_attr "type" "neon_sat_shift_imm<q>")]
4922 ;; vshr_n, vrshr_n
4923 (define_insn "neon_v<shift_op><sup>_n<mode>"
4924   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4925         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4926                        (match_operand:SI 2 "immediate_operand" "i")]
4927                       VSHR_N))]
4928   "TARGET_NEON"
4930   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4931   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4933   [(set_attr "type" "neon_shift_imm<q>")]
4936 ;; vshrn_n, vrshrn_n
4937 (define_insn "neon_v<shift_op>_n<mode>"
4938   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4939         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4940                             (match_operand:SI 2 "immediate_operand" "i")]
4941                            VSHRN_N))]
4942   "TARGET_NEON"
4944   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4945   return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4947   [(set_attr "type" "neon_shift_imm_narrow_q")]
4950 ;; vqshrn_n, vqrshrn_n
4951 (define_insn "neon_v<shift_op><sup>_n<mode>"
4952   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4953         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4954                             (match_operand:SI 2 "immediate_operand" "i")]
4955                            VQSHRN_N))]
4956   "TARGET_NEON"
4958   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4959   return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4961   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4964 ;; vqshrun_n, vqrshrun_n
4965 (define_insn "neon_v<shift_op>_n<mode>"
4966   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4967         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4968                             (match_operand:SI 2 "immediate_operand" "i")]
4969                            VQSHRUN_N))]
4970   "TARGET_NEON"
4972   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4973   return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4975   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4978 (define_insn "neon_vshl_n<mode>"
4979   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4980         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4981                        (match_operand:SI 2 "immediate_operand" "i")]
4982                       UNSPEC_VSHL_N))]
4983   "TARGET_NEON"
4985   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4986   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4988   [(set_attr "type" "neon_shift_imm<q>")]
4991 (define_insn "neon_vqshl_<sup>_n<mode>"
4992   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4993         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4994                        (match_operand:SI 2 "immediate_operand" "i")]
4995                       VQSHL_N))]
4996   "TARGET_NEON"
4998   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4999   return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5001   [(set_attr "type" "neon_sat_shift_imm<q>")]
5004 (define_insn "neon_vqshlu_n<mode>"
5005   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5006         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5007                        (match_operand:SI 2 "immediate_operand" "i")]
5008                       UNSPEC_VQSHLU_N))]
5009   "TARGET_NEON"
5011   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5012   return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5014   [(set_attr "type" "neon_sat_shift_imm<q>")]
5017 (define_insn "neon_vshll<sup>_n<mode>"
5018   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5019         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5020                            (match_operand:SI 2 "immediate_operand" "i")]
5021                           VSHLL_N))]
5022   "TARGET_NEON"
5024   /* The boundaries are: 0 < imm <= size.  */
5025   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5026   return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5028   [(set_attr "type" "neon_shift_imm_long")]
5031 ;; vsra_n, vrsra_n
5032 (define_insn "neon_v<shift_op><sup>_n<mode>"
5033   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5034         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5035                        (match_operand:VDQIX 2 "s_register_operand" "w")
5036                        (match_operand:SI 3 "immediate_operand" "i")]
5037                       VSRA_N))]
5038   "TARGET_NEON"
5040   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5041   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5043   [(set_attr "type" "neon_shift_acc<q>")]
5046 (define_insn "neon_vsri_n<mode>"
5047   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5048         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5049                        (match_operand:VDQIX 2 "s_register_operand" "w")
5050                        (match_operand:SI 3 "immediate_operand" "i")]
5051                       UNSPEC_VSRI))]
5052   "TARGET_NEON"
5054   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5055   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5057   [(set_attr "type" "neon_shift_reg<q>")]
5060 (define_insn "neon_vsli_n<mode>"
5061   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5062         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5063                        (match_operand:VDQIX 2 "s_register_operand" "w")
5064                        (match_operand:SI 3 "immediate_operand" "i")]
5065                       UNSPEC_VSLI))]
5066   "TARGET_NEON"
5068   arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5069   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5071   [(set_attr "type" "neon_shift_reg<q>")]
5074 (define_insn "neon_vtbl1v8qi"
5075   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5076         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5077                       (match_operand:V8QI 2 "s_register_operand" "w")]
5078                      UNSPEC_VTBL))]
5079   "TARGET_NEON"
5080   "vtbl.8\t%P0, {%P1}, %P2"
5081   [(set_attr "type" "neon_tbl1")]
5084 (define_insn "neon_vtbl2v8qi"
5085   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086         (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5087                       (match_operand:V8QI 2 "s_register_operand" "w")]
5088                      UNSPEC_VTBL))]
5089   "TARGET_NEON"
5091   rtx ops[4];
5092   int tabbase = REGNO (operands[1]);
5094   ops[0] = operands[0];
5095   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097   ops[3] = operands[2];
5098   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5100   return "";
5102   [(set_attr "type" "neon_tbl2")]
5105 (define_insn "neon_vtbl3v8qi"
5106   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5107         (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5108                       (match_operand:V8QI 2 "s_register_operand" "w")]
5109                      UNSPEC_VTBL))]
5110   "TARGET_NEON"
5112   rtx ops[5];
5113   int tabbase = REGNO (operands[1]);
5115   ops[0] = operands[0];
5116   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5117   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5118   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5119   ops[4] = operands[2];
5120   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5122   return "";
5124   [(set_attr "type" "neon_tbl3")]
5127 (define_insn "neon_vtbl4v8qi"
5128   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5129         (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5130                       (match_operand:V8QI 2 "s_register_operand" "w")]
5131                      UNSPEC_VTBL))]
5132   "TARGET_NEON"
5134   rtx ops[6];
5135   int tabbase = REGNO (operands[1]);
5137   ops[0] = operands[0];
5138   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5139   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5140   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5141   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5142   ops[5] = operands[2];
5143   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5145   return "";
5147   [(set_attr "type" "neon_tbl4")]
5150 ;; These three are used by the vec_perm infrastructure for V16QImode.
5151 (define_insn_and_split "neon_vtbl1v16qi"
5152   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5153         (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5154                        (match_operand:V16QI 2 "s_register_operand" "w")]
5155                       UNSPEC_VTBL))]
5156   "TARGET_NEON"
5157   "#"
5158   "&& reload_completed"
5159   [(const_int 0)]
5161   rtx op0, op1, op2, part0, part2;
5162   unsigned ofs;
5164   op0 = operands[0];
5165   op1 = gen_lowpart (TImode, operands[1]);
5166   op2 = operands[2];
5168   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5169   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5170   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5171   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5173   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5174   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5175   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5176   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5177   DONE;
5179   [(set_attr "type" "multiple")]
5182 (define_insn_and_split "neon_vtbl2v16qi"
5183   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5184         (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5185                        (match_operand:V16QI 2 "s_register_operand" "w")]
5186                       UNSPEC_VTBL))]
5187   "TARGET_NEON"
5188   "#"
5189   "&& reload_completed"
5190   [(const_int 0)]
5192   rtx op0, op1, op2, part0, part2;
5193   unsigned ofs;
5195   op0 = operands[0];
5196   op1 = operands[1];
5197   op2 = operands[2];
5199   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5200   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5201   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5202   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5204   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5205   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5206   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5207   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5208   DONE;
5210   [(set_attr "type" "multiple")]
5213 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5214 ;; handle quad-word input modes, producing octa-word output modes.  But
5215 ;; that requires us to add support for octa-word vector modes in moves.
5216 ;; That seems overkill for this one use in vec_perm.
5217 (define_insn_and_split "neon_vcombinev16qi"
5218   [(set (match_operand:OI 0 "s_register_operand" "=w")
5219         (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5220                     (match_operand:V16QI 2 "s_register_operand" "w")]
5221                    UNSPEC_VCONCAT))]
5222   "TARGET_NEON"
5223   "#"
5224   "&& reload_completed"
5225   [(const_int 0)]
5227   neon_split_vcombine (operands);
5228   DONE;
5230 [(set_attr "type" "multiple")]
5233 (define_insn "neon_vtbx1v8qi"
5234   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5235         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5236                       (match_operand:V8QI 2 "s_register_operand" "w")
5237                       (match_operand:V8QI 3 "s_register_operand" "w")]
5238                      UNSPEC_VTBX))]
5239   "TARGET_NEON"
5240   "vtbx.8\t%P0, {%P2}, %P3"
5241   [(set_attr "type" "neon_tbl1")]
5244 (define_insn "neon_vtbx2v8qi"
5245   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5246         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5247                       (match_operand:TI 2 "s_register_operand" "w")
5248                       (match_operand:V8QI 3 "s_register_operand" "w")]
5249                      UNSPEC_VTBX))]
5250   "TARGET_NEON"
5252   rtx ops[4];
5253   int tabbase = REGNO (operands[2]);
5255   ops[0] = operands[0];
5256   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5257   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5258   ops[3] = operands[3];
5259   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5261   return "";
5263   [(set_attr "type" "neon_tbl2")]
5266 (define_insn "neon_vtbx3v8qi"
5267   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5268         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5269                       (match_operand:EI 2 "s_register_operand" "w")
5270                       (match_operand:V8QI 3 "s_register_operand" "w")]
5271                      UNSPEC_VTBX))]
5272   "TARGET_NEON"
5274   rtx ops[5];
5275   int tabbase = REGNO (operands[2]);
5277   ops[0] = operands[0];
5278   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5279   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5280   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5281   ops[4] = operands[3];
5282   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5284   return "";
5286   [(set_attr "type" "neon_tbl3")]
5289 (define_insn "neon_vtbx4v8qi"
5290   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5291         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5292                       (match_operand:OI 2 "s_register_operand" "w")
5293                       (match_operand:V8QI 3 "s_register_operand" "w")]
5294                      UNSPEC_VTBX))]
5295   "TARGET_NEON"
5297   rtx ops[6];
5298   int tabbase = REGNO (operands[2]);
5300   ops[0] = operands[0];
5301   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5302   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5303   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5304   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5305   ops[5] = operands[3];
5306   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5308   return "";
5310   [(set_attr "type" "neon_tbl4")]
5313 (define_expand "neon_vtrn<mode>_internal"
5314   [(parallel
5315     [(set (match_operand:VDQWH 0 "s_register_operand")
5316           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5317                          (match_operand:VDQWH 2 "s_register_operand")]
5318            UNSPEC_VTRN1))
5319      (set (match_operand:VDQWH 3 "s_register_operand")
5320           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5321   "TARGET_NEON"
5322   ""
5325 ;; Note: Different operand numbering to handle tied registers correctly.
5326 (define_insn "*neon_vtrn<mode>_insn"
5327   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5328         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5329                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5330          UNSPEC_VTRN1))
5331    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5332         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5333          UNSPEC_VTRN2))]
5334   "TARGET_NEON"
5335   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5336   [(set_attr "type" "neon_permute<q>")]
5339 (define_expand "neon_vzip<mode>_internal"
5340   [(parallel
5341     [(set (match_operand:VDQWH 0 "s_register_operand")
5342           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5343                          (match_operand:VDQWH 2 "s_register_operand")]
5344            UNSPEC_VZIP1))
5345     (set (match_operand:VDQWH 3 "s_register_operand")
5346          (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5347   "TARGET_NEON"
5348   ""
5351 ;; Note: Different operand numbering to handle tied registers correctly.
5352 (define_insn "*neon_vzip<mode>_insn"
5353   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5354         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5355                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5356          UNSPEC_VZIP1))
5357    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5358         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5359          UNSPEC_VZIP2))]
5360   "TARGET_NEON"
5361   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5362   [(set_attr "type" "neon_zip<q>")]
5365 (define_expand "neon_vuzp<mode>_internal"
5366   [(parallel
5367     [(set (match_operand:VDQWH 0 "s_register_operand")
5368           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5369                         (match_operand:VDQWH 2 "s_register_operand")]
5370            UNSPEC_VUZP1))
5371      (set (match_operand:VDQWH 3 "s_register_operand" "")
5372           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5373   "TARGET_NEON"
5374   ""
5377 ;; Note: Different operand numbering to handle tied registers correctly.
5378 (define_insn "*neon_vuzp<mode>_insn"
5379   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5380         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5381                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5382          UNSPEC_VUZP1))
5383    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5384         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5385          UNSPEC_VUZP2))]
5386   "TARGET_NEON"
5387   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5388   [(set_attr "type" "neon_zip<q>")]
5391 (define_expand "vec_load_lanes<mode><mode>"
5392   [(set (match_operand:VDQX 0 "s_register_operand")
5393         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5394                      UNSPEC_VLD1))]
5395   "TARGET_NEON")
5397 (define_insn "neon_vld1<mode>"
5398   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5399         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5400                     UNSPEC_VLD1))]
5401   "TARGET_NEON"
5402   "vld1.<V_sz_elem>\t%h0, %A1"
5403   [(set_attr "type" "neon_load1_1reg<q>")]
5406 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5407 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5408 ;; lane order here.
5409 (define_insn "neon_vld1_lane<mode>"
5410   [(set (match_operand:VDX 0 "s_register_operand" "=w")
5411         (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412                      (match_operand:VDX 2 "s_register_operand" "0")
5413                      (match_operand:SI 3 "immediate_operand" "i")]
5414                     UNSPEC_VLD1_LANE))]
5415   "TARGET_NEON"
5417   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419   operands[3] = GEN_INT (lane);
5420   if (max == 1)
5421     return "vld1.<V_sz_elem>\t%P0, %A1";
5422   else
5423     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5425   [(set_attr "type" "neon_load1_one_lane<q>")]
5428 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5429 ;; here on big endian targets.
5430 (define_insn "neon_vld1_lane<mode>"
5431   [(set (match_operand:VQX 0 "s_register_operand" "=w")
5432         (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5433                      (match_operand:VQX 2 "s_register_operand" "0")
5434                      (match_operand:SI 3 "immediate_operand" "i")]
5435                     UNSPEC_VLD1_LANE))]
5436   "TARGET_NEON"
5438   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5439   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5440   operands[3] = GEN_INT (lane);
5441   int regno = REGNO (operands[0]);
5442   if (lane >= max / 2)
5443     {
5444       lane -= max / 2;
5445       regno += 2;
5446       operands[3] = GEN_INT (lane);
5447     }
5448   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5449   if (max == 2)
5450     return "vld1.<V_sz_elem>\t%P0, %A1";
5451   else
5452     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5454   [(set_attr "type" "neon_load1_one_lane<q>")]
5457 (define_insn "neon_vld1_dup<mode>"
5458   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5459         (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5460   "TARGET_NEON"
5461   "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5462   [(set_attr "type" "neon_load1_all_lanes<q>")]
5465 ;; Special case for DImode.  Treat it exactly like a simple load.
5466 (define_expand "neon_vld1_dupdi"
5467   [(set (match_operand:DI 0 "s_register_operand" "")
5468         (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5469                    UNSPEC_VLD1))]
5470   "TARGET_NEON"
5471   ""
5474 (define_insn "neon_vld1_dup<mode>"
5475   [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5476         (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5477   "TARGET_NEON"
5479   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5481   [(set_attr "type" "neon_load1_all_lanes<q>")]
5484 (define_insn_and_split "neon_vld1_dupv2di"
5485    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5486     (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5487    "TARGET_NEON"
5488    "#"
5489    "&& reload_completed"
5490    [(const_int 0)]
5491    {
5492     rtx tmprtx = gen_lowpart (DImode, operands[0]);
5493     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5494     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5495     DONE;
5496     }
5497   [(set_attr "length" "8")
5498    (set_attr "type" "neon_load1_all_lanes_q")]
5501 (define_expand "vec_store_lanes<mode><mode>"
5502   [(set (match_operand:VDQX 0 "neon_struct_operand")
5503         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5504                      UNSPEC_VST1))]
5505   "TARGET_NEON")
5507 (define_insn "neon_vst1<mode>"
5508   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5509         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5510                      UNSPEC_VST1))]
5511   "TARGET_NEON"
5512   "vst1.<V_sz_elem>\t%h1, %A0"
5513   [(set_attr "type" "neon_store1_1reg<q>")])
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5519         (unspec:<V_elem>
5520           [(match_operand:VDX 1 "s_register_operand" "w")
5521            (match_operand:SI 2 "immediate_operand" "i")]
5522           UNSPEC_VST1_LANE))]
5523   "TARGET_NEON"
5525   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527   operands[2] = GEN_INT (lane);
5528   if (max == 1)
5529     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5530   else
5531     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5533   [(set_attr "type" "neon_store1_one_lane<q>")]
5536 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5537 ;; here on big endian targets.
5538 (define_insn "neon_vst1_lane<mode>"
5539   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5540         (unspec:<V_elem>
5541           [(match_operand:VQX 1 "s_register_operand" "w")
5542            (match_operand:SI 2 "immediate_operand" "i")]
5543           UNSPEC_VST1_LANE))]
5544   "TARGET_NEON"
5546   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5547   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5548   int regno = REGNO (operands[1]);
5549   if (lane >= max / 2)
5550     {
5551       lane -= max / 2;
5552       regno += 2;
5553     }
5554   operands[2] = GEN_INT (lane);
5555   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5556   if (max == 2)
5557     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5558   else
5559     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5561   [(set_attr "type" "neon_store1_one_lane<q>")]
5564 (define_expand "vec_load_lanesti<mode>"
5565   [(set (match_operand:TI 0 "s_register_operand")
5566         (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5567                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568                    UNSPEC_VLD2))]
5569   "TARGET_NEON")
5571 (define_insn "neon_vld2<mode>"
5572   [(set (match_operand:TI 0 "s_register_operand" "=w")
5573         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5574                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575                    UNSPEC_VLD2))]
5576   "TARGET_NEON"
5578   if (<V_sz_elem> == 64)
5579     return "vld1.64\t%h0, %A1";
5580   else
5581     return "vld2.<V_sz_elem>\t%h0, %A1";
5583   [(set (attr "type")
5584       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5585                     (const_string "neon_load1_2reg<q>")
5586                     (const_string "neon_load2_2reg<q>")))]
5589 (define_expand "vec_load_lanesoi<mode>"
5590   [(set (match_operand:OI 0 "s_register_operand")
5591         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5592                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593                    UNSPEC_VLD2))]
5594   "TARGET_NEON")
5596 (define_insn "neon_vld2<mode>"
5597   [(set (match_operand:OI 0 "s_register_operand" "=w")
5598         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5599                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600                    UNSPEC_VLD2))]
5601   "TARGET_NEON"
5602   "vld2.<V_sz_elem>\t%h0, %A1"
5603   [(set_attr "type" "neon_load2_2reg_q")])
5605 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5606 ;; here on big endian targets.
5607 (define_insn "neon_vld2_lane<mode>"
5608   [(set (match_operand:TI 0 "s_register_operand" "=w")
5609         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5610                     (match_operand:TI 2 "s_register_operand" "0")
5611                     (match_operand:SI 3 "immediate_operand" "i")
5612                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5613                    UNSPEC_VLD2_LANE))]
5614   "TARGET_NEON"
5616   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5617   int regno = REGNO (operands[0]);
5618   rtx ops[4];
5619   ops[0] = gen_rtx_REG (DImode, regno);
5620   ops[1] = gen_rtx_REG (DImode, regno + 2);
5621   ops[2] = operands[1];
5622   ops[3] = GEN_INT (lane);
5623   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5624   return "";
5626   [(set_attr "type" "neon_load2_one_lane<q>")]
5629 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5630 ;; here on big endian targets.
5631 (define_insn "neon_vld2_lane<mode>"
5632   [(set (match_operand:OI 0 "s_register_operand" "=w")
5633         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5634                     (match_operand:OI 2 "s_register_operand" "0")
5635                     (match_operand:SI 3 "immediate_operand" "i")
5636                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5637                    UNSPEC_VLD2_LANE))]
5638   "TARGET_NEON"
5640   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5641   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5642   int regno = REGNO (operands[0]);
5643   rtx ops[4];
5644   if (lane >= max / 2)
5645     {
5646       lane -= max / 2;
5647       regno += 2;
5648     }
5649   ops[0] = gen_rtx_REG (DImode, regno);
5650   ops[1] = gen_rtx_REG (DImode, regno + 4);
5651   ops[2] = operands[1];
5652   ops[3] = GEN_INT (lane);
5653   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5654   return "";
5656   [(set_attr "type" "neon_load2_one_lane<q>")]
5659 (define_insn "neon_vld2_dup<mode>"
5660   [(set (match_operand:TI 0 "s_register_operand" "=w")
5661         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5662                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663                    UNSPEC_VLD2_DUP))]
5664   "TARGET_NEON"
5666   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5667     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5668   else
5669     return "vld1.<V_sz_elem>\t%h0, %A1";
5671   [(set (attr "type")
5672       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5673                     (const_string "neon_load2_all_lanes<q>")
5674                     (const_string "neon_load1_1reg<q>")))]
5677 (define_expand "vec_store_lanesti<mode>"
5678   [(set (match_operand:TI 0 "neon_struct_operand")
5679         (unspec:TI [(match_operand:TI 1 "s_register_operand")
5680                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5681                    UNSPEC_VST2))]
5682   "TARGET_NEON")
5684 (define_insn "neon_vst2<mode>"
5685   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5686         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5687                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688                    UNSPEC_VST2))]
5689   "TARGET_NEON"
5691   if (<V_sz_elem> == 64)
5692     return "vst1.64\t%h1, %A0";
5693   else
5694     return "vst2.<V_sz_elem>\t%h1, %A0";
5696   [(set (attr "type")
5697       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5698                     (const_string "neon_store1_2reg<q>")
5699                     (const_string "neon_store2_one_lane<q>")))]
5702 (define_expand "vec_store_lanesoi<mode>"
5703   [(set (match_operand:OI 0 "neon_struct_operand")
5704         (unspec:OI [(match_operand:OI 1 "s_register_operand")
5705                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5706                    UNSPEC_VST2))]
5707   "TARGET_NEON")
5709 (define_insn "neon_vst2<mode>"
5710   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5711         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5712                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5713                    UNSPEC_VST2))]
5714   "TARGET_NEON"
5715   "vst2.<V_sz_elem>\t%h1, %A0"
5716   [(set_attr "type" "neon_store2_4reg<q>")]
5719 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5720 ;; here on big endian targets.
5721 (define_insn "neon_vst2_lane<mode>"
5722   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5723         (unspec:<V_two_elem>
5724           [(match_operand:TI 1 "s_register_operand" "w")
5725            (match_operand:SI 2 "immediate_operand" "i")
5726            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5727           UNSPEC_VST2_LANE))]
5728   "TARGET_NEON"
5730   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5731   int regno = REGNO (operands[1]);
5732   rtx ops[4];
5733   ops[0] = operands[0];
5734   ops[1] = gen_rtx_REG (DImode, regno);
5735   ops[2] = gen_rtx_REG (DImode, regno + 2);
5736   ops[3] = GEN_INT (lane);
5737   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5738   return "";
5740   [(set_attr "type" "neon_store2_one_lane<q>")]
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vst2_lane<mode>"
5746   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5747         (unspec:<V_two_elem>
5748            [(match_operand:OI 1 "s_register_operand" "w")
5749             (match_operand:SI 2 "immediate_operand" "i")
5750             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5751            UNSPEC_VST2_LANE))]
5752   "TARGET_NEON"
5754   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5755   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5756   int regno = REGNO (operands[1]);
5757   rtx ops[4];
5758   if (lane >= max / 2)
5759     {
5760       lane -= max / 2;
5761       regno += 2;
5762     }
5763   ops[0] = operands[0];
5764   ops[1] = gen_rtx_REG (DImode, regno);
5765   ops[2] = gen_rtx_REG (DImode, regno + 4);
5766   ops[3] = GEN_INT (lane);
5767   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5768   return "";
5770   [(set_attr "type" "neon_store2_one_lane<q>")]
5773 (define_expand "vec_load_lanesei<mode>"
5774   [(set (match_operand:EI 0 "s_register_operand")
5775         (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5776                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5777                    UNSPEC_VLD3))]
5778   "TARGET_NEON")
5780 (define_insn "neon_vld3<mode>"
5781   [(set (match_operand:EI 0 "s_register_operand" "=w")
5782         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5783                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784                    UNSPEC_VLD3))]
5785   "TARGET_NEON"
5787   if (<V_sz_elem> == 64)
5788     return "vld1.64\t%h0, %A1";
5789   else
5790     return "vld3.<V_sz_elem>\t%h0, %A1";
5792   [(set (attr "type")
5793       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5794                     (const_string "neon_load1_3reg<q>")
5795                     (const_string "neon_load3_3reg<q>")))]
5798 (define_expand "vec_load_lanesci<mode>"
5799   [(match_operand:CI 0 "s_register_operand")
5800    (match_operand:CI 1 "neon_struct_operand")
5801    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5802   "TARGET_NEON"
5804   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5805   DONE;
5808 (define_expand "neon_vld3<mode>"
5809   [(match_operand:CI 0 "s_register_operand")
5810    (match_operand:CI 1 "neon_struct_operand")
5811    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5812   "TARGET_NEON"
5814   rtx mem;
5816   mem = adjust_address (operands[1], EImode, 0);
5817   emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5818   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5819   emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5820   DONE;
5823 (define_insn "neon_vld3qa<mode>"
5824   [(set (match_operand:CI 0 "s_register_operand" "=w")
5825         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5826                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827                    UNSPEC_VLD3A))]
5828   "TARGET_NEON"
5830   int regno = REGNO (operands[0]);
5831   rtx ops[4];
5832   ops[0] = gen_rtx_REG (DImode, regno);
5833   ops[1] = gen_rtx_REG (DImode, regno + 4);
5834   ops[2] = gen_rtx_REG (DImode, regno + 8);
5835   ops[3] = operands[1];
5836   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5837   return "";
5839   [(set_attr "type" "neon_load3_3reg<q>")]
5842 (define_insn "neon_vld3qb<mode>"
5843   [(set (match_operand:CI 0 "s_register_operand" "=w")
5844         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5845                     (match_operand:CI 2 "s_register_operand" "0")
5846                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5847                    UNSPEC_VLD3B))]
5848   "TARGET_NEON"
5850   int regno = REGNO (operands[0]);
5851   rtx ops[4];
5852   ops[0] = gen_rtx_REG (DImode, regno + 2);
5853   ops[1] = gen_rtx_REG (DImode, regno + 6);
5854   ops[2] = gen_rtx_REG (DImode, regno + 10);
5855   ops[3] = operands[1];
5856   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5857   return "";
5859   [(set_attr "type" "neon_load3_3reg<q>")]
5862 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5863 ;; here on big endian targets.
5864 (define_insn "neon_vld3_lane<mode>"
5865   [(set (match_operand:EI 0 "s_register_operand" "=w")
5866         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5867                     (match_operand:EI 2 "s_register_operand" "0")
5868                     (match_operand:SI 3 "immediate_operand" "i")
5869                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5870                    UNSPEC_VLD3_LANE))]
5871   "TARGET_NEON"
5873   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5874   int regno = REGNO (operands[0]);
5875   rtx ops[5];
5876   ops[0] = gen_rtx_REG (DImode, regno);
5877   ops[1] = gen_rtx_REG (DImode, regno + 2);
5878   ops[2] = gen_rtx_REG (DImode, regno + 4);
5879   ops[3] = operands[1];
5880   ops[4] = GEN_INT (lane);
5881   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5882                    ops);
5883   return "";
5885   [(set_attr "type" "neon_load3_one_lane<q>")]
5888 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5889 ;; here on big endian targets.
5890 (define_insn "neon_vld3_lane<mode>"
5891   [(set (match_operand:CI 0 "s_register_operand" "=w")
5892         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5893                     (match_operand:CI 2 "s_register_operand" "0")
5894                     (match_operand:SI 3 "immediate_operand" "i")
5895                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5896                    UNSPEC_VLD3_LANE))]
5897   "TARGET_NEON"
5899   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5900   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5901   int regno = REGNO (operands[0]);
5902   rtx ops[5];
5903   if (lane >= max / 2)
5904     {
5905       lane -= max / 2;
5906       regno += 2;
5907     }
5908   ops[0] = gen_rtx_REG (DImode, regno);
5909   ops[1] = gen_rtx_REG (DImode, regno + 4);
5910   ops[2] = gen_rtx_REG (DImode, regno + 8);
5911   ops[3] = operands[1];
5912   ops[4] = GEN_INT (lane);
5913   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5914                    ops);
5915   return "";
5917   [(set_attr "type" "neon_load3_one_lane<q>")]
5920 (define_insn "neon_vld3_dup<mode>"
5921   [(set (match_operand:EI 0 "s_register_operand" "=w")
5922         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5923                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5924                    UNSPEC_VLD3_DUP))]
5925   "TARGET_NEON"
5927   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5928     {
5929       int regno = REGNO (operands[0]);
5930       rtx ops[4];
5931       ops[0] = gen_rtx_REG (DImode, regno);
5932       ops[1] = gen_rtx_REG (DImode, regno + 2);
5933       ops[2] = gen_rtx_REG (DImode, regno + 4);
5934       ops[3] = operands[1];
5935       output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5936       return "";
5937     }
5938   else
5939     return "vld1.<V_sz_elem>\t%h0, %A1";
5941   [(set (attr "type")
5942       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5943                     (const_string "neon_load3_all_lanes<q>")
5944                     (const_string "neon_load1_1reg<q>")))])
5946 (define_expand "vec_store_lanesei<mode>"
5947   [(set (match_operand:EI 0 "neon_struct_operand")
5948         (unspec:EI [(match_operand:EI 1 "s_register_operand")
5949                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5950                    UNSPEC_VST3))]
5951   "TARGET_NEON")
5953 (define_insn "neon_vst3<mode>"
5954   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5955         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5956                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5957                    UNSPEC_VST3))]
5958   "TARGET_NEON"
5960   if (<V_sz_elem> == 64)
5961     return "vst1.64\t%h1, %A0";
5962   else
5963     return "vst3.<V_sz_elem>\t%h1, %A0";
5965   [(set (attr "type")
5966       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5967                     (const_string "neon_store1_3reg<q>")
5968                     (const_string "neon_store3_one_lane<q>")))])
5970 (define_expand "vec_store_lanesci<mode>"
5971   [(match_operand:CI 0 "neon_struct_operand")
5972    (match_operand:CI 1 "s_register_operand")
5973    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5974   "TARGET_NEON"
5976   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5977   DONE;
5980 (define_expand "neon_vst3<mode>"
5981   [(match_operand:CI 0 "neon_struct_operand")
5982    (match_operand:CI 1 "s_register_operand")
5983    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5984   "TARGET_NEON"
5986   rtx mem;
5988   mem = adjust_address (operands[0], EImode, 0);
5989   emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5990   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5991   emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5992   DONE;
5995 (define_insn "neon_vst3qa<mode>"
5996   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5997         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5998                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5999                    UNSPEC_VST3A))]
6000   "TARGET_NEON"
6002   int regno = REGNO (operands[1]);
6003   rtx ops[4];
6004   ops[0] = operands[0];
6005   ops[1] = gen_rtx_REG (DImode, regno);
6006   ops[2] = gen_rtx_REG (DImode, regno + 4);
6007   ops[3] = gen_rtx_REG (DImode, regno + 8);
6008   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6009   return "";
6011   [(set_attr "type" "neon_store3_3reg<q>")]
6014 (define_insn "neon_vst3qb<mode>"
6015   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6016         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6017                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6018                    UNSPEC_VST3B))]
6019   "TARGET_NEON"
6021   int regno = REGNO (operands[1]);
6022   rtx ops[4];
6023   ops[0] = operands[0];
6024   ops[1] = gen_rtx_REG (DImode, regno + 2);
6025   ops[2] = gen_rtx_REG (DImode, regno + 6);
6026   ops[3] = gen_rtx_REG (DImode, regno + 10);
6027   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6028   return "";
6030   [(set_attr "type" "neon_store3_3reg<q>")]
6033 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6034 ;; here on big endian targets.
6035 (define_insn "neon_vst3_lane<mode>"
6036   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6037         (unspec:<V_three_elem>
6038            [(match_operand:EI 1 "s_register_operand" "w")
6039             (match_operand:SI 2 "immediate_operand" "i")
6040             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6041            UNSPEC_VST3_LANE))]
6042   "TARGET_NEON"
6044   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6045   int regno = REGNO (operands[1]);
6046   rtx ops[5];
6047   ops[0] = operands[0];
6048   ops[1] = gen_rtx_REG (DImode, regno);
6049   ops[2] = gen_rtx_REG (DImode, regno + 2);
6050   ops[3] = gen_rtx_REG (DImode, regno + 4);
6051   ops[4] = GEN_INT (lane);
6052   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6053                    ops);
6054   return "";
6056   [(set_attr "type" "neon_store3_one_lane<q>")]
6059 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6060 ;; here on big endian targets.
6061 (define_insn "neon_vst3_lane<mode>"
6062   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6063         (unspec:<V_three_elem>
6064            [(match_operand:CI 1 "s_register_operand" "w")
6065             (match_operand:SI 2 "immediate_operand" "i")
6066             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6067            UNSPEC_VST3_LANE))]
6068   "TARGET_NEON"
6070   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6071   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6072   int regno = REGNO (operands[1]);
6073   rtx ops[5];
6074   if (lane >= max / 2)
6075     {
6076       lane -= max / 2;
6077       regno += 2;
6078     }
6079   ops[0] = operands[0];
6080   ops[1] = gen_rtx_REG (DImode, regno);
6081   ops[2] = gen_rtx_REG (DImode, regno + 4);
6082   ops[3] = gen_rtx_REG (DImode, regno + 8);
6083   ops[4] = GEN_INT (lane);
6084   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6085                    ops);
6086   return "";
6088   [(set_attr "type" "neon_store3_one_lane<q>")]
6091 (define_expand "vec_load_lanesoi<mode>"
6092   [(set (match_operand:OI 0 "s_register_operand")
6093         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6094                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6095                    UNSPEC_VLD4))]
6096   "TARGET_NEON")
6098 (define_insn "neon_vld4<mode>"
6099   [(set (match_operand:OI 0 "s_register_operand" "=w")
6100         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6101                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6102                    UNSPEC_VLD4))]
6103   "TARGET_NEON"
6105   if (<V_sz_elem> == 64)
6106     return "vld1.64\t%h0, %A1";
6107   else
6108     return "vld4.<V_sz_elem>\t%h0, %A1";
6110   [(set (attr "type")
6111       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6112                     (const_string "neon_load1_4reg<q>")
6113                     (const_string "neon_load4_4reg<q>")))]
6116 (define_expand "vec_load_lanesxi<mode>"
6117   [(match_operand:XI 0 "s_register_operand")
6118    (match_operand:XI 1 "neon_struct_operand")
6119    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6120   "TARGET_NEON"
6122   emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6123   DONE;
6126 (define_expand "neon_vld4<mode>"
6127   [(match_operand:XI 0 "s_register_operand")
6128    (match_operand:XI 1 "neon_struct_operand")
6129    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6130   "TARGET_NEON"
6132   rtx mem;
6134   mem = adjust_address (operands[1], OImode, 0);
6135   emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6136   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6137   emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6138   DONE;
6141 (define_insn "neon_vld4qa<mode>"
6142   [(set (match_operand:XI 0 "s_register_operand" "=w")
6143         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6144                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6145                    UNSPEC_VLD4A))]
6146   "TARGET_NEON"
6148   int regno = REGNO (operands[0]);
6149   rtx ops[5];
6150   ops[0] = gen_rtx_REG (DImode, regno);
6151   ops[1] = gen_rtx_REG (DImode, regno + 4);
6152   ops[2] = gen_rtx_REG (DImode, regno + 8);
6153   ops[3] = gen_rtx_REG (DImode, regno + 12);
6154   ops[4] = operands[1];
6155   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6156   return "";
6158   [(set_attr "type" "neon_load4_4reg<q>")]
6161 (define_insn "neon_vld4qb<mode>"
6162   [(set (match_operand:XI 0 "s_register_operand" "=w")
6163         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6164                     (match_operand:XI 2 "s_register_operand" "0")
6165                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6166                    UNSPEC_VLD4B))]
6167   "TARGET_NEON"
6169   int regno = REGNO (operands[0]);
6170   rtx ops[5];
6171   ops[0] = gen_rtx_REG (DImode, regno + 2);
6172   ops[1] = gen_rtx_REG (DImode, regno + 6);
6173   ops[2] = gen_rtx_REG (DImode, regno + 10);
6174   ops[3] = gen_rtx_REG (DImode, regno + 14);
6175   ops[4] = operands[1];
6176   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6177   return "";
6179   [(set_attr "type" "neon_load4_4reg<q>")]
6182 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6183 ;; here on big endian targets.
6184 (define_insn "neon_vld4_lane<mode>"
6185   [(set (match_operand:OI 0 "s_register_operand" "=w")
6186         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6187                     (match_operand:OI 2 "s_register_operand" "0")
6188                     (match_operand:SI 3 "immediate_operand" "i")
6189                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6190                    UNSPEC_VLD4_LANE))]
6191   "TARGET_NEON"
6193   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6194   int regno = REGNO (operands[0]);
6195   rtx ops[6];
6196   ops[0] = gen_rtx_REG (DImode, regno);
6197   ops[1] = gen_rtx_REG (DImode, regno + 2);
6198   ops[2] = gen_rtx_REG (DImode, regno + 4);
6199   ops[3] = gen_rtx_REG (DImode, regno + 6);
6200   ops[4] = operands[1];
6201   ops[5] = GEN_INT (lane);
6202   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6203                    ops);
6204   return "";
6206   [(set_attr "type" "neon_load4_one_lane<q>")]
6209 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6210 ;; here on big endian targets.
6211 (define_insn "neon_vld4_lane<mode>"
6212   [(set (match_operand:XI 0 "s_register_operand" "=w")
6213         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6214                     (match_operand:XI 2 "s_register_operand" "0")
6215                     (match_operand:SI 3 "immediate_operand" "i")
6216                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6217                    UNSPEC_VLD4_LANE))]
6218   "TARGET_NEON"
6220   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6221   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6222   int regno = REGNO (operands[0]);
6223   rtx ops[6];
6224   if (lane >= max / 2)
6225     {
6226       lane -= max / 2;
6227       regno += 2;
6228     }
6229   ops[0] = gen_rtx_REG (DImode, regno);
6230   ops[1] = gen_rtx_REG (DImode, regno + 4);
6231   ops[2] = gen_rtx_REG (DImode, regno + 8);
6232   ops[3] = gen_rtx_REG (DImode, regno + 12);
6233   ops[4] = operands[1];
6234   ops[5] = GEN_INT (lane);
6235   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6236                    ops);
6237   return "";
6239   [(set_attr "type" "neon_load4_one_lane<q>")]
6242 (define_insn "neon_vld4_dup<mode>"
6243   [(set (match_operand:OI 0 "s_register_operand" "=w")
6244         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6245                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6246                    UNSPEC_VLD4_DUP))]
6247   "TARGET_NEON"
6249   if (GET_MODE_NUNITS (<MODE>mode) > 1)
6250     {
6251       int regno = REGNO (operands[0]);
6252       rtx ops[5];
6253       ops[0] = gen_rtx_REG (DImode, regno);
6254       ops[1] = gen_rtx_REG (DImode, regno + 2);
6255       ops[2] = gen_rtx_REG (DImode, regno + 4);
6256       ops[3] = gen_rtx_REG (DImode, regno + 6);
6257       ops[4] = operands[1];
6258       output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6259                        ops);
6260       return "";
6261     }
6262   else
6263     return "vld1.<V_sz_elem>\t%h0, %A1";
6265   [(set (attr "type")
6266       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6267                     (const_string "neon_load4_all_lanes<q>")
6268                     (const_string "neon_load1_1reg<q>")))]
6271 (define_expand "vec_store_lanesoi<mode>"
6272   [(set (match_operand:OI 0 "neon_struct_operand")
6273         (unspec:OI [(match_operand:OI 1 "s_register_operand")
6274                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6275                    UNSPEC_VST4))]
6276   "TARGET_NEON")
6278 (define_insn "neon_vst4<mode>"
6279   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6280         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6281                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6282                    UNSPEC_VST4))]
6283   "TARGET_NEON"
6285   if (<V_sz_elem> == 64)
6286     return "vst1.64\t%h1, %A0";
6287   else
6288     return "vst4.<V_sz_elem>\t%h1, %A0";
6290   [(set (attr "type")
6291       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6292                     (const_string "neon_store1_4reg<q>")
6293                     (const_string "neon_store4_4reg<q>")))]
6296 (define_expand "vec_store_lanesxi<mode>"
6297   [(match_operand:XI 0 "neon_struct_operand")
6298    (match_operand:XI 1 "s_register_operand")
6299    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6300   "TARGET_NEON"
6302   emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6303   DONE;
6306 (define_expand "neon_vst4<mode>"
6307   [(match_operand:XI 0 "neon_struct_operand")
6308    (match_operand:XI 1 "s_register_operand")
6309    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6310   "TARGET_NEON"
6312   rtx mem;
6314   mem = adjust_address (operands[0], OImode, 0);
6315   emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6316   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6317   emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6318   DONE;
6321 (define_insn "neon_vst4qa<mode>"
6322   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6323         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6324                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6325                    UNSPEC_VST4A))]
6326   "TARGET_NEON"
6328   int regno = REGNO (operands[1]);
6329   rtx ops[5];
6330   ops[0] = operands[0];
6331   ops[1] = gen_rtx_REG (DImode, regno);
6332   ops[2] = gen_rtx_REG (DImode, regno + 4);
6333   ops[3] = gen_rtx_REG (DImode, regno + 8);
6334   ops[4] = gen_rtx_REG (DImode, regno + 12);
6335   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6336   return "";
6338   [(set_attr "type" "neon_store4_4reg<q>")]
6341 (define_insn "neon_vst4qb<mode>"
6342   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6343         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6344                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6345                    UNSPEC_VST4B))]
6346   "TARGET_NEON"
6348   int regno = REGNO (operands[1]);
6349   rtx ops[5];
6350   ops[0] = operands[0];
6351   ops[1] = gen_rtx_REG (DImode, regno + 2);
6352   ops[2] = gen_rtx_REG (DImode, regno + 6);
6353   ops[3] = gen_rtx_REG (DImode, regno + 10);
6354   ops[4] = gen_rtx_REG (DImode, regno + 14);
6355   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6356   return "";
6358   [(set_attr "type" "neon_store4_4reg<q>")]
6361 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6362 ;; here on big endian targets.
6363 (define_insn "neon_vst4_lane<mode>"
6364   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6365         (unspec:<V_four_elem>
6366            [(match_operand:OI 1 "s_register_operand" "w")
6367             (match_operand:SI 2 "immediate_operand" "i")
6368             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6369            UNSPEC_VST4_LANE))]
6370   "TARGET_NEON"
6372   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6373   int regno = REGNO (operands[1]);
6374   rtx ops[6];
6375   ops[0] = operands[0];
6376   ops[1] = gen_rtx_REG (DImode, regno);
6377   ops[2] = gen_rtx_REG (DImode, regno + 2);
6378   ops[3] = gen_rtx_REG (DImode, regno + 4);
6379   ops[4] = gen_rtx_REG (DImode, regno + 6);
6380   ops[5] = GEN_INT (lane);
6381   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6382                    ops);
6383   return "";
6385   [(set_attr "type" "neon_store4_one_lane<q>")]
6388 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6389 ;; here on big endian targets.
6390 (define_insn "neon_vst4_lane<mode>"
6391   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6392         (unspec:<V_four_elem>
6393            [(match_operand:XI 1 "s_register_operand" "w")
6394             (match_operand:SI 2 "immediate_operand" "i")
6395             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6396            UNSPEC_VST4_LANE))]
6397   "TARGET_NEON"
6399   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6400   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6401   int regno = REGNO (operands[1]);
6402   rtx ops[6];
6403   if (lane >= max / 2)
6404     {
6405       lane -= max / 2;
6406       regno += 2;
6407     }
6408   ops[0] = operands[0];
6409   ops[1] = gen_rtx_REG (DImode, regno);
6410   ops[2] = gen_rtx_REG (DImode, regno + 4);
6411   ops[3] = gen_rtx_REG (DImode, regno + 8);
6412   ops[4] = gen_rtx_REG (DImode, regno + 12);
6413   ops[5] = GEN_INT (lane);
6414   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6415                    ops);
6416   return "";
6418   [(set_attr "type" "neon_store4_4reg<q>")]
6421 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6422   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6423         (SE:<V_unpack> (vec_select:<V_HALF>
6424                           (match_operand:VU 1 "register_operand" "w")
6425                           (match_operand:VU 2 "vect_par_constant_low" ""))))]
6426   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6427   "vmovl.<US><V_sz_elem> %q0, %e1"
6428   [(set_attr "type" "neon_shift_imm_long")]
6431 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6432   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6433         (SE:<V_unpack> (vec_select:<V_HALF>
6434                           (match_operand:VU 1 "register_operand" "w")
6435                           (match_operand:VU 2 "vect_par_constant_high" ""))))]
6436   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6437   "vmovl.<US><V_sz_elem> %q0, %f1"
6438   [(set_attr "type" "neon_shift_imm_long")]
6441 (define_expand "vec_unpack<US>_hi_<mode>"
6442   [(match_operand:<V_unpack> 0 "register_operand" "")
6443    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6444  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6445   {
6446    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6447    rtx t1;
6448    int i;
6449    for (i = 0; i < (<V_mode_nunits>/2); i++)
6450      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6451   
6452    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6453    emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 
6454                                                  operands[1], 
6455                                                  t1));
6456    DONE;
6457   }
6460 (define_expand "vec_unpack<US>_lo_<mode>"
6461   [(match_operand:<V_unpack> 0 "register_operand" "")
6462    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6463  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6464   {
6465    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6466    rtx t1;
6467    int i;
6468    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6469      RTVEC_ELT (v, i) = GEN_INT (i);
6470    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6471    emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 
6472                                                  operands[1], 
6473                                                  t1));
6474    DONE;
6475   }
6478 (define_insn "neon_vec_<US>mult_lo_<mode>"
6479  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6480        (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6481                            (match_operand:VU 1 "register_operand" "w") 
6482                            (match_operand:VU 2 "vect_par_constant_low" "")))
6483                         (SE:<V_unpack> (vec_select:<V_HALF>
6484                            (match_operand:VU 3 "register_operand" "w") 
6485                            (match_dup 2)))))]
6486   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6487   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6488   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6491 (define_expand "vec_widen_<US>mult_lo_<mode>"
6492   [(match_operand:<V_unpack> 0 "register_operand" "")
6493    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6494    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6495  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6497    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6498    rtx t1;
6499    int i;
6500    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6501      RTVEC_ELT (v, i) = GEN_INT (i);
6502    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6504    emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6505                                                operands[1],
6506                                                t1,
6507                                                operands[2]));
6508    DONE;
6512 (define_insn "neon_vec_<US>mult_hi_<mode>"
6513  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6514       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6515                             (match_operand:VU 1 "register_operand" "w") 
6516                             (match_operand:VU 2 "vect_par_constant_high" "")))
6517                        (SE:<V_unpack> (vec_select:<V_HALF>
6518                             (match_operand:VU 3 "register_operand" "w") 
6519                             (match_dup 2)))))]
6520   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6521   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6522   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6525 (define_expand "vec_widen_<US>mult_hi_<mode>"
6526   [(match_operand:<V_unpack> 0 "register_operand" "")
6527    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6528    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6529  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6531    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6532    rtx t1;
6533    int i;
6534    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6535      RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6536    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6538    emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6539                                                operands[1],
6540                                                t1,
6541                                                operands[2]));
6542    DONE;
6547 (define_insn "neon_vec_<US>shiftl_<mode>"
6548  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6549        (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6550        (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6551   "TARGET_NEON"
6553   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6555   [(set_attr "type" "neon_shift_imm_long")]
6558 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6559   [(match_operand:<V_unpack> 0 "register_operand" "")
6560    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6561    (match_operand:SI 2 "immediate_operand" "i")]
6562  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6564   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6565                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6566                 operands[2]));
6567    DONE;
6571 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6572   [(match_operand:<V_unpack> 0 "register_operand" "")
6573    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6574    (match_operand:SI 2 "immediate_operand" "i")]
6575  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6577   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6578                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6579                                      GET_MODE_SIZE (<V_HALF>mode)),
6580                 operands[2]));
6581    DONE;
6585 ;; Vectorize for non-neon-quad case
6586 (define_insn "neon_unpack<US>_<mode>"
6587  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6588        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6589  "TARGET_NEON"
6590  "vmovl.<US><V_sz_elem> %q0, %P1"
6591   [(set_attr "type" "neon_move")]
6594 (define_expand "vec_unpack<US>_lo_<mode>"
6595  [(match_operand:<V_double_width> 0 "register_operand" "")
6596   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6597  "TARGET_NEON"
6599   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6600   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6601   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6603   DONE;
6607 (define_expand "vec_unpack<US>_hi_<mode>"
6608  [(match_operand:<V_double_width> 0 "register_operand" "")
6609   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6610  "TARGET_NEON"
6612   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6613   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6614   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6616   DONE;
6620 (define_insn "neon_vec_<US>mult_<mode>"
6621  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6622        (mult:<V_widen> (SE:<V_widen> 
6623                            (match_operand:VDI 1 "register_operand" "w"))
6624                        (SE:<V_widen> 
6625                            (match_operand:VDI 2 "register_operand" "w"))))]
6626   "TARGET_NEON"
6627   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6628   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6631 (define_expand "vec_widen_<US>mult_hi_<mode>"
6632   [(match_operand:<V_double_width> 0 "register_operand" "")
6633    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6634    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6635  "TARGET_NEON"
6637    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6638    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6639    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6640                                             
6641    DONE;
6646 (define_expand "vec_widen_<US>mult_lo_<mode>"
6647   [(match_operand:<V_double_width> 0 "register_operand" "")
6648    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6649    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6650  "TARGET_NEON"
6652    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6653    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6654    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6655                                             
6656    DONE;
6661 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6662  [(match_operand:<V_double_width> 0 "register_operand" "")
6663    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6664    (match_operand:SI 2 "immediate_operand" "i")]
6665  "TARGET_NEON"
6667    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6668    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6669    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6671    DONE;
6675 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6676   [(match_operand:<V_double_width> 0 "register_operand" "")
6677    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6678    (match_operand:SI 2 "immediate_operand" "i")]
6679  "TARGET_NEON"
6681    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6682    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6683    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6685    DONE;
6689 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6690 ; because the ordering of vector elements in Q registers is different from what
6691 ; the semantics of the instructions require.
6693 (define_insn "vec_pack_trunc_<mode>"
6694  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6695        (vec_concat:<V_narrow_pack> 
6696                 (truncate:<V_narrow> 
6697                         (match_operand:VN 1 "register_operand" "w"))
6698                 (truncate:<V_narrow>
6699                         (match_operand:VN 2 "register_operand" "w"))))]
6700  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6701  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6702  [(set_attr "type" "multiple")
6703   (set_attr "length" "8")]
6706 ;; For the non-quad case.
6707 (define_insn "neon_vec_pack_trunc_<mode>"
6708  [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6709        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6710  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6711  "vmovn.i<V_sz_elem>\t%P0, %q1"
6712  [(set_attr "type" "neon_move_narrow_q")]
6715 (define_expand "vec_pack_trunc_<mode>"
6716  [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6717   (match_operand:VSHFT 1 "register_operand" "")
6718   (match_operand:VSHFT 2 "register_operand")]
6719  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6721   rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6722   
6723   emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 
6724   emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 
6725   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6726   DONE;
6729 (define_insn "neon_vabd<mode>_2"
6730  [(set (match_operand:VF 0 "s_register_operand" "=w")
6731        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6732                          (match_operand:VF 2 "s_register_operand" "w"))))]
6733  "TARGET_NEON && flag_unsafe_math_optimizations"
6734  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6735  [(set_attr "type" "neon_fp_abd_s<q>")]
6738 (define_insn "neon_vabd<mode>_3"
6739  [(set (match_operand:VF 0 "s_register_operand" "=w")
6740        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6741                             (match_operand:VF 2 "s_register_operand" "w")]
6742                 UNSPEC_VSUB)))]
6743  "TARGET_NEON && flag_unsafe_math_optimizations"
6744  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6745  [(set_attr "type" "neon_fp_abd_s<q>")]
6748 ;; Copy from core-to-neon regs, then extend, not vice-versa
6750 (define_split
6751   [(set (match_operand:DI 0 "s_register_operand" "")
6752         (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6753   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6754   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6755    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6756   {
6757     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6758   })
6760 (define_split
6761   [(set (match_operand:DI 0 "s_register_operand" "")
6762         (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6763   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6764   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6765    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6766   {
6767     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6768   })
6770 (define_split
6771   [(set (match_operand:DI 0 "s_register_operand" "")
6772         (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6773   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6774   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6775    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6776   {
6777     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6778   })
6780 (define_split
6781   [(set (match_operand:DI 0 "s_register_operand" "")
6782         (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6783   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6784   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6785    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6786   {
6787     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6788   })
6790 (define_split
6791   [(set (match_operand:DI 0 "s_register_operand" "")
6792         (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6793   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6794   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6795    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6796   {
6797     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6798   })
6800 (define_split
6801   [(set (match_operand:DI 0 "s_register_operand" "")
6802         (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6803   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6804   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6805    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6806   {
6807     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6808   })