2018-01-29 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / arm / neon.md
blob6a6f5d737715e4100adee8fb7de1d6211da3d85c
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27   [(set (match_operand:VDX 0 "nonimmediate_operand"
28           "=w,Un,w, w,  ?r,?w,?r,?r, ?Us")
29         (match_operand:VDX 1 "general_operand"
30           " w,w, Dn,Uni, w, r, r, Usi,r"))]
31   "TARGET_NEON
32    && (register_operand (operands[0], <MODE>mode)
33        || register_operand (operands[1], <MODE>mode))"
35   if (which_alternative == 2)
36     {
37       int width, is_valid;
38       static char templ[40];
40       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41         &operands[1], &width);
43       gcc_assert (is_valid != 0);
45       if (width == 0)
46         return "vmov.f32\t%P0, %1  @ <mode>";
47       else
48         sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
50       return templ;
51     }
53   switch (which_alternative)
54     {
55     case 0: return "vmov\t%P0, %P1  @ <mode>";
56     case 1: case 3: return output_move_neon (operands);
57     case 2: gcc_unreachable ();
58     case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
59     case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
60     default: return output_move_double (operands, true, NULL);
61     }
63  [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64                     neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65                     neon_load1_2reg, neon_store1_2reg")
66   (set_attr "length" "4,4,4,4,4,4,8,8,8")
67   (set_attr "arm_pool_range"     "*,*,*,1020,*,*,*,1020,*")
68   (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,*,1018,*")
69   (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73           "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
74         (match_operand:VQXMOV 1 "general_operand"
75           " w,w, Dn,Uni, w, r, r, Usi, r"))]
76   "TARGET_NEON
77    && (register_operand (operands[0], <MODE>mode)
78        || register_operand (operands[1], <MODE>mode))"
80   if (which_alternative == 2)
81     {
82       int width, is_valid;
83       static char templ[40];
85       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86         &operands[1], &width);
88       gcc_assert (is_valid != 0);
90       if (width == 0)
91         return "vmov.f32\t%q0, %1  @ <mode>";
92       else
93         sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
95       return templ;
96     }
98   switch (which_alternative)
99     {
100     case 0: return "vmov\t%q0, %q1  @ <mode>";
101     case 1: case 3: return output_move_neon (operands);
102     case 2: gcc_unreachable ();
103     case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
104     case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
105     default: return output_move_quad (operands);
106     }
108   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109                      neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110                      mov_reg,neon_load1_4reg,neon_store1_4reg")
111    (set_attr "length" "4,8,4,8,8,8,16,8,16")
112    (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113    (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114    (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117   [(set (match_operand:TI 0 "nonimmediate_operand" "")
118         (match_operand:TI 1 "general_operand" ""))]
119   "TARGET_NEON"
121   if (can_create_pseudo_p ())
122     {
123       if (!REG_P (operands[0]))
124         operands[1] = force_reg (TImode, operands[1]);
125     }
128 (define_expand "mov<mode>"
129   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130         (match_operand:VSTRUCT 1 "general_operand" ""))]
131   "TARGET_NEON"
133   if (can_create_pseudo_p ())
134     {
135       if (!REG_P (operands[0]))
136         operands[1] = force_reg (<MODE>mode, operands[1]);
137     }
140 (define_expand "movv4hf"
141   [(set (match_operand:V4HF 0 "s_register_operand")
142         (match_operand:V4HF 1 "s_register_operand"))]
143   "TARGET_NEON && TARGET_FP16"
145   /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
146      causing an ICE on big-endian because it cannot extract subregs in
147      this case.  */
148   if (can_create_pseudo_p ())
149     {
150       if (!REG_P (operands[0]))
151         operands[1] = force_reg (V4HFmode, operands[1]);
152     }
155 (define_expand "movv8hf"
156   [(set (match_operand:V8HF 0 "")
157         (match_operand:V8HF 1 ""))]
158   "TARGET_NEON && TARGET_FP16"
160   /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
161      causing an ICE on big-endian because it cannot extract subregs in
162      this case.  */
163   if (can_create_pseudo_p ())
164     {
165       if (!REG_P (operands[0]))
166         operands[1] = force_reg (V8HFmode, operands[1]);
167     }
170 (define_insn "*neon_mov<mode>"
171   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172         (match_operand:VSTRUCT 1 "general_operand"      " w,w, Ut"))]
173   "TARGET_NEON
174    && (register_operand (operands[0], <MODE>mode)
175        || register_operand (operands[1], <MODE>mode))"
177   switch (which_alternative)
178     {
179     case 0: return "#";
180     case 1: case 2: return output_move_neon (operands);
181     default: gcc_unreachable ();
182     }
184   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
187 (define_split
188   [(set (match_operand:EI 0 "s_register_operand" "")
189         (match_operand:EI 1 "s_register_operand" ""))]
190   "TARGET_NEON && reload_completed"
191   [(set (match_dup 0) (match_dup 1))
192    (set (match_dup 2) (match_dup 3))]
194   int rdest = REGNO (operands[0]);
195   int rsrc = REGNO (operands[1]);
196   rtx dest[2], src[2];
198   dest[0] = gen_rtx_REG (TImode, rdest);
199   src[0] = gen_rtx_REG (TImode, rsrc);
200   dest[1] = gen_rtx_REG (DImode, rdest + 4);
201   src[1] = gen_rtx_REG (DImode, rsrc + 4);
203   neon_disambiguate_copy (operands, dest, src, 2);
206 (define_split
207   [(set (match_operand:OI 0 "s_register_operand" "")
208         (match_operand:OI 1 "s_register_operand" ""))]
209   "TARGET_NEON && reload_completed"
210   [(set (match_dup 0) (match_dup 1))
211    (set (match_dup 2) (match_dup 3))]
213   int rdest = REGNO (operands[0]);
214   int rsrc = REGNO (operands[1]);
215   rtx dest[2], src[2];
217   dest[0] = gen_rtx_REG (TImode, rdest);
218   src[0] = gen_rtx_REG (TImode, rsrc);
219   dest[1] = gen_rtx_REG (TImode, rdest + 4);
220   src[1] = gen_rtx_REG (TImode, rsrc + 4);
222   neon_disambiguate_copy (operands, dest, src, 2);
225 (define_split
226   [(set (match_operand:CI 0 "s_register_operand" "")
227         (match_operand:CI 1 "s_register_operand" ""))]
228   "TARGET_NEON && reload_completed"
229   [(set (match_dup 0) (match_dup 1))
230    (set (match_dup 2) (match_dup 3))
231    (set (match_dup 4) (match_dup 5))]
233   int rdest = REGNO (operands[0]);
234   int rsrc = REGNO (operands[1]);
235   rtx dest[3], src[3];
237   dest[0] = gen_rtx_REG (TImode, rdest);
238   src[0] = gen_rtx_REG (TImode, rsrc);
239   dest[1] = gen_rtx_REG (TImode, rdest + 4);
240   src[1] = gen_rtx_REG (TImode, rsrc + 4);
241   dest[2] = gen_rtx_REG (TImode, rdest + 8);
242   src[2] = gen_rtx_REG (TImode, rsrc + 8);
244   neon_disambiguate_copy (operands, dest, src, 3);
247 (define_split
248   [(set (match_operand:XI 0 "s_register_operand" "")
249         (match_operand:XI 1 "s_register_operand" ""))]
250   "TARGET_NEON && reload_completed"
251   [(set (match_dup 0) (match_dup 1))
252    (set (match_dup 2) (match_dup 3))
253    (set (match_dup 4) (match_dup 5))
254    (set (match_dup 6) (match_dup 7))]
256   int rdest = REGNO (operands[0]);
257   int rsrc = REGNO (operands[1]);
258   rtx dest[4], src[4];
260   dest[0] = gen_rtx_REG (TImode, rdest);
261   src[0] = gen_rtx_REG (TImode, rsrc);
262   dest[1] = gen_rtx_REG (TImode, rdest + 4);
263   src[1] = gen_rtx_REG (TImode, rsrc + 4);
264   dest[2] = gen_rtx_REG (TImode, rdest + 8);
265   src[2] = gen_rtx_REG (TImode, rsrc + 8);
266   dest[3] = gen_rtx_REG (TImode, rdest + 12);
267   src[3] = gen_rtx_REG (TImode, rsrc + 12);
269   neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274         (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275                      UNSPEC_MISALIGNED_ACCESS))]
276   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
278   rtx adjust_mem;
279   /* This pattern is not permitted to fail during expansion: if both arguments
280      are non-registers (e.g. memory := constant, which can be created by the
281      auto-vectorizer), force operand 1 into a register.  */
282   if (!s_register_operand (operands[0], <MODE>mode)
283       && !s_register_operand (operands[1], <MODE>mode))
284     operands[1] = force_reg (<MODE>mode, operands[1]);
286   if (s_register_operand (operands[0], <MODE>mode))
287     adjust_mem = operands[1];
288   else
289     adjust_mem = operands[0];
291   /* Legitimize address.  */
292   if (!neon_vector_mem_operand (adjust_mem, 2, true))
293     XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"   "=Um")
299         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300                     UNSPEC_MISALIGNED_ACCESS))]
301   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302   "vst1.<V_sz_elem>\t{%P1}, %A0"
303   [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306   [(set (match_operand:VDX 0 "s_register_operand"                       "=w")
307         (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308                                                                         " Um")]
309                     UNSPEC_MISALIGNED_ACCESS))]
310   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311   "vld1.<V_sz_elem>\t{%P0}, %A1"
312   [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
316         (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317                     UNSPEC_MISALIGNED_ACCESS))]
318   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319   "vst1.<V_sz_elem>\t{%q1}, %A0"
320   [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323   [(set (match_operand:VQX 0 "s_register_operand"                       "=w")
324         (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325                                                                         " Um")]
326                     UNSPEC_MISALIGNED_ACCESS))]
327   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328   "vld1.<V_sz_elem>\t{%q0}, %A1"
329   [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333         (vec_merge:VD_LANE
334           (vec_duplicate:VD_LANE
335             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336           (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337           (match_operand:SI 2 "immediate_operand" "i,i")))]
338   "TARGET_NEON"
340   int elt = ffs ((int) INTVAL (operands[2])) - 1;
341   if (BYTES_BIG_ENDIAN)
342     elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343   operands[2] = GEN_INT (elt);
345   if (which_alternative == 0)
346     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347   else
348     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354         (vec_merge:VQ2
355           (vec_duplicate:VQ2
356             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357           (match_operand:VQ2 3 "s_register_operand" "0,0")
358           (match_operand:SI 2 "immediate_operand" "i,i")))]
359   "TARGET_NEON"
361   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363   int elt = elem % half_elts;
364   int hi = (elem / half_elts) * 2;
365   int regno = REGNO (operands[0]);
367   if (BYTES_BIG_ENDIAN)
368     elt = half_elts - 1 - elt;
370   operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371   operands[2] = GEN_INT (elt);
373   if (which_alternative == 0)
374     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375   else
376     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383         (vec_merge:V2DI
384           (vec_duplicate:V2DI
385             (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386           (match_operand:V2DI 3 "s_register_operand" "0,0")
387           (match_operand:SI 2 "immediate_operand" "i,i")))]
388   "TARGET_NEON"
390   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391   int regno = REGNO (operands[0]) + 2 * elem;
393   operands[0] = gen_rtx_REG (DImode, regno);
395   if (which_alternative == 0)
396     return "vld1.64\t%P0, %A1";
397   else
398     return "vmov\t%P0, %Q1, %R1";
400   [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404   [(match_operand:VDQ 0 "s_register_operand" "")
405    (match_operand:<V_elem> 1 "s_register_operand" "")
406    (match_operand:SI 2 "immediate_operand" "")]
407   "TARGET_NEON"
409   HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411                                          GEN_INT (elem), operands[0]));
412   DONE;
415 (define_insn "vec_extract<mode><V_elem_l>"
416   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417         (vec_select:<V_elem>
418           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420   "TARGET_NEON"
422   if (BYTES_BIG_ENDIAN)
423     {
424       int elt = INTVAL (operands[2]);
425       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426       operands[2] = GEN_INT (elt);
427     }
429   if (which_alternative == 0)
430     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431   else
432     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode><V_elem_l>"
438   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439         (vec_select:<V_elem>
440           (match_operand:VQ2 1 "s_register_operand" "w,w")
441           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442   "TARGET_NEON"
444   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445   int elt = INTVAL (operands[2]) % half_elts;
446   int hi = (INTVAL (operands[2]) / half_elts) * 2;
447   int regno = REGNO (operands[1]);
449   if (BYTES_BIG_ENDIAN)
450     elt = half_elts - 1 - elt;
452   operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453   operands[2] = GEN_INT (elt);
455   if (which_alternative == 0)
456     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457   else
458     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2didi"
464   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465         (vec_select:DI
466           (match_operand:V2DI 1 "s_register_operand" "w,w")
467           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468   "TARGET_NEON"
470   int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472   operands[1] = gen_rtx_REG (DImode, regno);
474   if (which_alternative == 0)
475     return "vst1.64\t{%P1}, %A0  @ v2di";
476   else
477     return "vmov\t%Q0, %R0, %P1  @ v2di";
479   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode><V_elem_l>"
483   [(match_operand:VDQ 0 "s_register_operand" "")
484    (match_operand 1 "" "")]
485   "TARGET_NEON"
487   neon_expand_vector_init (operands[0], operands[1]);
488   DONE;
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499                   (match_operand:VDQ 2 "s_register_operand" "w")))]
500   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502   [(set (attr "type")
503       (if_then_else (match_test "<Is_float_mode>")
504                     (const_string "neon_fp_addsub_s<q>")
505                     (const_string "neon_add<q>")))]
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
511 (define_insn "add<mode>3"
512   [(set
513     (match_operand:VH 0 "s_register_operand" "=w")
514     (plus:VH
515      (match_operand:VH 1 "s_register_operand" "w")
516      (match_operand:VH 2 "s_register_operand" "w")))]
517  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
519  [(set (attr "type")
520    (if_then_else (match_test "<Is_float_mode>")
521     (const_string "neon_fp_addsub_s<q>")
522     (const_string "neon_add<q>")))]
525 (define_insn "add<mode>3_fp16"
526   [(set
527     (match_operand:VH 0 "s_register_operand" "=w")
528     (plus:VH
529      (match_operand:VH 1 "s_register_operand" "w")
530      (match_operand:VH 2 "s_register_operand" "w")))]
531  "TARGET_NEON_FP16INST"
532  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
533  [(set (attr "type")
534    (if_then_else (match_test "<Is_float_mode>")
535     (const_string "neon_fp_addsub_s<q>")
536     (const_string "neon_add<q>")))]
539 (define_insn "adddi3_neon"
540   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541         (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542                  (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
543    (clobber (reg:CC CC_REGNUM))]
544   "TARGET_NEON"
546   switch (which_alternative)
547     {
548     case 0: /* fall through */
549     case 3: return "vadd.i64\t%P0, %P1, %P2";
550     case 1: return "#";
551     case 2: return "#";
552     case 4: return "#";
553     case 5: return "#";
554     case 6: return "#";
555     default: gcc_unreachable ();
556     }
558   [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559                      multiple,multiple,multiple")
560    (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561    (set_attr "length" "*,8,8,*,8,8,8")
562    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
565 (define_insn "*sub<mode>3_neon"
566   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568                    (match_operand:VDQ 2 "s_register_operand" "w")))]
569   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
571   [(set (attr "type")
572       (if_then_else (match_test "<Is_float_mode>")
573                     (const_string "neon_fp_addsub_s<q>")
574                     (const_string "neon_sub<q>")))]
577 (define_insn "sub<mode>3"
578  [(set
579    (match_operand:VH 0 "s_register_operand" "=w")
580    (minus:VH
581     (match_operand:VH 1 "s_register_operand" "w")
582     (match_operand:VH 2 "s_register_operand" "w")))]
583  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585  [(set_attr "type" "neon_sub<q>")]
588 (define_insn "sub<mode>3_fp16"
589  [(set
590    (match_operand:VH 0 "s_register_operand" "=w")
591    (minus:VH
592     (match_operand:VH 1 "s_register_operand" "w")
593     (match_operand:VH 2 "s_register_operand" "w")))]
594  "TARGET_NEON_FP16INST"
595  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596  [(set_attr "type" "neon_sub<q>")]
599 (define_insn "subdi3_neon"
600   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601         (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602                   (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603    (clobber (reg:CC CC_REGNUM))]
604   "TARGET_NEON"
606   switch (which_alternative)
607     {
608     case 0: /* fall through */
609     case 4: return "vsub.i64\t%P0, %P1, %P2";
610     case 1: /* fall through */ 
611     case 2: /* fall through */
612     case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613     default: gcc_unreachable ();
614     }
616   [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617    (set_attr "conds" "*,clob,clob,clob,*")
618    (set_attr "length" "*,8,8,8,*")
619    (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
622 (define_insn "*mul<mode>3_neon"
623   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625                    (match_operand:VDQW 2 "s_register_operand" "w")))]
626   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
628   [(set (attr "type")
629       (if_then_else (match_test "<Is_float_mode>")
630                     (const_string "neon_fp_mul_s<q>")
631                     (const_string "neon_mul_<V_elem_ch><q>")))]
634 (define_insn "mul<mode>3add<mode>_neon"
635   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637                             (match_operand:VDQW 3 "s_register_operand" "w"))
638                   (match_operand:VDQW 1 "s_register_operand" "0")))]
639   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
641   [(set (attr "type")
642       (if_then_else (match_test "<Is_float_mode>")
643                     (const_string "neon_fp_mla_s<q>")
644                     (const_string "neon_mla_<V_elem_ch><q>")))]
647 (define_insn "mul<mode>3add<mode>_neon"
648   [(set (match_operand:VH 0 "s_register_operand" "=w")
649         (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650                           (match_operand:VH 3 "s_register_operand" "w"))
651                   (match_operand:VH 1 "s_register_operand" "0")))]
652   "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654   [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
662   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664   [(set (attr "type")
665       (if_then_else (match_test "<Is_float_mode>")
666                     (const_string "neon_fp_mla_s<q>")
667                     (const_string "neon_mla_<V_elem_ch><q>")))]
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;;       to be able to use when converting to FMA.
674 ;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676   [(set (match_operand:VCVTF 0 "register_operand" "=w")
677         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678                  (match_operand:VCVTF 2 "register_operand" "w")
679                  (match_operand:VCVTF 3 "register_operand" "0")))]
680   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682   [(set_attr "type" "neon_fp_mla_s<q>")]
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686   [(set (match_operand:VCVTF 0 "register_operand" "=w")
687         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688                  (match_operand:VCVTF 2 "register_operand" "w")
689                  (match_operand:VCVTF 3 "register_operand" "0")))]
690   "TARGET_NEON && TARGET_FMA"
691   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692   [(set_attr "type" "neon_fp_mla_s<q>")]
695 (define_insn "fma<VH:mode>4"
696  [(set (match_operand:VH 0 "register_operand" "=w")
697    (fma:VH
698     (match_operand:VH 1 "register_operand" "w")
699     (match_operand:VH 2 "register_operand" "w")
700     (match_operand:VH 3 "register_operand" "0")))]
701  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703  [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VH:mode>4_intrinsic"
707  [(set (match_operand:VH 0 "register_operand" "=w")
708    (fma:VH
709     (match_operand:VH 1 "register_operand" "w")
710     (match_operand:VH 2 "register_operand" "w")
711     (match_operand:VH 3 "register_operand" "0")))]
712  "TARGET_NEON_FP16INST"
713  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714  [(set_attr "type" "neon_fp_mla_s<q>")]
717 (define_insn "*fmsub<VCVTF:mode>4"
718   [(set (match_operand:VCVTF 0 "register_operand" "=w")
719         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720                    (match_operand:VCVTF 2 "register_operand" "w")
721                    (match_operand:VCVTF 3 "register_operand" "0")))]
722   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724   [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728  [(set (match_operand:VCVTF 0 "register_operand" "=w")
729    (fma:VCVTF
730     (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731     (match_operand:VCVTF 2 "register_operand" "w")
732     (match_operand:VCVTF 3 "register_operand" "0")))]
733  "TARGET_NEON && TARGET_FMA"
734  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735  [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739  [(set (match_operand:VH 0 "register_operand" "=w")
740    (fma:VH
741     (neg:VH (match_operand:VH 1 "register_operand" "w"))
742     (match_operand:VH 2 "register_operand" "w")
743     (match_operand:VH 3 "register_operand" "0")))]
744  "TARGET_NEON_FP16INST"
745  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746  [(set_attr "type" "neon_fp_mla_s<q>")]
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751         (unspec:VCVTF [(match_operand:VCVTF 1
752                          "s_register_operand" "w")]
753                 NEON_VRINT))]
754   "TARGET_NEON && TARGET_VFP5"
755   "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761         (FIXUORS:<V_cmp_result> (unspec:VCVTF
762                                [(match_operand:VCVTF 1 "register_operand" "w")]
763                                NEON_VCVT)))]
764   "TARGET_NEON && TARGET_VFP5"
765   "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767    (set_attr "predicable" "no")]
770 (define_insn "ior<mode>3"
771   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773                  (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
774   "TARGET_NEON"
776   switch (which_alternative)
777     {
778     case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779     case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780                      <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781     default: gcc_unreachable ();
782     }
784   [(set_attr "type" "neon_logic<q>")]
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
792 (define_insn "and<mode>3"
793   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794         (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795                  (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
796   "TARGET_NEON"
798   switch (which_alternative)
799     {
800     case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801     case 1: return neon_output_logic_immediate ("vand", &operands[2],
802                      <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803     default: gcc_unreachable ();
804     }
806   [(set_attr "type" "neon_logic<q>")]
809 (define_insn "orn<mode>3_neon"
810   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811         (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812                  (match_operand:VDQ 1 "s_register_operand" "w")))]
813   "TARGET_NEON"
814   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815   [(set_attr "type" "neon_logic<q>")]
818 ;; TODO: investigate whether we should disable 
819 ;; this and bicdi3_neon for the A8 in line with the other
820 ;; changes above. 
821 (define_insn_and_split "orndi3_neon"
822   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823         (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824                 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
825   "TARGET_NEON"
826   "@
827    vorn\t%P0, %P1, %P2
828    #
829    #
830    #"
831   "reload_completed && 
832    (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833   [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834    (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
835   "
836   {
837     if (TARGET_THUMB2)
838       {
839         operands[3] = gen_highpart (SImode, operands[0]);
840         operands[0] = gen_lowpart (SImode, operands[0]);
841         operands[4] = gen_highpart (SImode, operands[2]);
842         operands[2] = gen_lowpart (SImode, operands[2]);
843         operands[5] = gen_highpart (SImode, operands[1]);
844         operands[1] = gen_lowpart (SImode, operands[1]);
845       }
846     else
847       {
848         emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849         emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
850         DONE;
851       }
852   }"
853   [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854    (set_attr "length" "*,16,8,8")
855    (set_attr "arch" "any,a,t2,t2")]
858 (define_insn "bic<mode>3_neon"
859   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860         (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861                  (match_operand:VDQ 1 "s_register_operand" "w")))]
862   "TARGET_NEON"
863   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864   [(set_attr "type" "neon_logic<q>")]
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870         (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871                 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
872   "TARGET_NEON"
873   "@
874    vbic\t%P0, %P1, %P2
875    #
876    #"
877   [(set_attr "type" "neon_logic,multiple,multiple")
878    (set_attr "length" "*,8,8")]
881 (define_insn "xor<mode>3"
882   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883         (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884                  (match_operand:VDQ 2 "s_register_operand" "w")))]
885   "TARGET_NEON"
886   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887   [(set_attr "type" "neon_logic<q>")]
890 (define_insn "one_cmpl<mode>2"
891   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
893   "TARGET_NEON"
894   "vmvn\t%<V_reg>0, %<V_reg>1"
895   [(set_attr "type" "neon_move<q>")]
898 (define_insn "abs<mode>2"
899   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900         (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
901   "TARGET_NEON"
902   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
903   [(set (attr "type")
904       (if_then_else (match_test "<Is_float_mode>")
905                     (const_string "neon_fp_abs_s<q>")
906                     (const_string "neon_abs<q>")))]
909 (define_insn "neg<mode>2"
910   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911         (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
912   "TARGET_NEON"
913   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
914   [(set (attr "type")
915       (if_then_else (match_test "<Is_float_mode>")
916                     (const_string "neon_fp_neg_s<q>")
917                     (const_string "neon_neg<q>")))]
920 (define_insn "negdi2_neon"
921   [(set (match_operand:DI 0 "s_register_operand"         "=&w, w,r,&r")
922         (neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
923    (clobber (match_scratch:DI 2                          "= X,&w,X, X"))
924    (clobber (reg:CC CC_REGNUM))]
925   "TARGET_NEON"
926   "#"
927   [(set_attr "length" "8")
928    (set_attr "type" "multiple")]
931 ; Split negdi2_neon for vfp registers
932 (define_split
933   [(set (match_operand:DI 0 "s_register_operand" "")
934         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935    (clobber (match_scratch:DI 2 ""))
936    (clobber (reg:CC CC_REGNUM))]
937   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938   [(set (match_dup 2) (const_int 0))
939    (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940               (clobber (reg:CC CC_REGNUM))])]
941   {
942     if (!REG_P (operands[2]))
943       operands[2] = operands[0];
944   }
947 ; Split negdi2_neon for core registers
948 (define_split
949   [(set (match_operand:DI 0 "s_register_operand" "")
950         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951    (clobber (match_scratch:DI 2 ""))
952    (clobber (reg:CC CC_REGNUM))]
953   "TARGET_32BIT && reload_completed
954    && arm_general_register_operand (operands[0], DImode)"
955   [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956               (clobber (reg:CC CC_REGNUM))])]
957   ""
960 (define_insn "<absneg_str><mode>2"
961   [(set (match_operand:VH 0 "s_register_operand" "=w")
962     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963  "TARGET_NEON_FP16INST"
964  "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965  [(set_attr "type" "neon_abs<q>")]
968 (define_expand "neon_v<absneg_str><mode>"
969  [(set
970    (match_operand:VH 0 "s_register_operand")
971    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972  "TARGET_NEON_FP16INST"
974   emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
975   DONE;
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979   [(set (match_operand:VH 0 "s_register_operand" "=w")
980     (unspec:VH
981      [(match_operand:VH 1 "s_register_operand" "w")]
982      FP16_RND))]
983  "TARGET_NEON_FP16INST"
984  "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985  [(set_attr "type" "neon_fp_round_s<q>")]
988 (define_insn "neon_vrsqrte<mode>"
989   [(set (match_operand:VH 0 "s_register_operand" "=w")
990     (unspec:VH
991      [(match_operand:VH 1 "s_register_operand" "w")]
992      UNSPEC_VRSQRTE))]
993   "TARGET_NEON_FP16INST"
994   "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
998 (define_insn "*umin<mode>3_neon"
999   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000         (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1002   "TARGET_NEON"
1003   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004   [(set_attr "type" "neon_minmax<q>")]
1007 (define_insn "*umax<mode>3_neon"
1008   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009         (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
1011   "TARGET_NEON"
1012   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013   [(set_attr "type" "neon_minmax<q>")]
1016 (define_insn "*smin<mode>3_neon"
1017   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018         (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1020   "TARGET_NEON"
1021   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1022   [(set (attr "type")
1023       (if_then_else (match_test "<Is_float_mode>")
1024                     (const_string "neon_fp_minmax_s<q>")
1025                     (const_string "neon_minmax<q>")))]
1028 (define_insn "*smax<mode>3_neon"
1029   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030         (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1032   "TARGET_NEON"
1033   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034   [(set (attr "type")
1035       (if_then_else (match_test "<Is_float_mode>")
1036                     (const_string "neon_fp_minmax_s<q>")
1037                     (const_string "neon_minmax<q>")))]
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code.  It ends up creating a V2DI constructor with
1042 ; SImode elements.
1044 (define_insn "vashl<mode>3"
1045   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046         (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047                       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1048   "TARGET_NEON"
1049   {
1050     switch (which_alternative)
1051       {
1052         case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053         case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1054                                                     <MODE>mode,
1055                                                     VALID_NEON_QREG_MODE (<MODE>mode),
1056                                                     true);
1057         default: gcc_unreachable ();
1058       }
1059   }
1060   [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1063 (define_insn "vashr<mode>3_imm"
1064   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1067   "TARGET_NEON"
1068   {
1069     return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1071                                         false);
1072   }
1073   [(set_attr "type" "neon_shift_imm<q>")]
1076 (define_insn "vlshr<mode>3_imm"
1077   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1080   "TARGET_NEON"
1081   {
1082     return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1084                                         false);
1085   }              
1086   [(set_attr "type" "neon_shift_imm<q>")]
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1092 ; shift amounts.
1094 (define_insn "ashl<mode>3_signed"
1095   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097                       (match_operand:VDQI 2 "s_register_operand" "w")]
1098                      UNSPEC_ASHIFT_SIGNED))]
1099   "TARGET_NEON"
1100   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101   [(set_attr "type" "neon_shift_reg<q>")]
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1107 (define_insn "ashl<mode>3_unsigned"
1108   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110                       (match_operand:VDQI 2 "s_register_operand" "w")]
1111                      UNSPEC_ASHIFT_UNSIGNED))]
1112   "TARGET_NEON"
1113   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114   [(set_attr "type" "neon_shift_reg<q>")]
1117 (define_expand "vashr<mode>3"
1118   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1121   "TARGET_NEON"
1123   if (s_register_operand (operands[2], <MODE>mode))
1124     {
1125       rtx neg = gen_reg_rtx (<MODE>mode);
1126       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127       emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1128     }
1129   else
1130     emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1131   DONE;
1134 (define_expand "vlshr<mode>3"
1135   [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1138   "TARGET_NEON"
1140   if (s_register_operand (operands[2], <MODE>mode))
1141     {
1142       rtx neg = gen_reg_rtx (<MODE>mode);
1143       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144       emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1145     }
1146   else
1147     emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1148   DONE;
1151 ;; 64-bit shifts
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized.  This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1157 ;; using an unspec.
1158 (define_insn "neon_load_count"
1159   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160         (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161                    UNSPEC_LOAD_COUNT))]
1162   "TARGET_NEON"
1163   "@
1164    vld1.32\t{%P0[0]}, %A1
1165    vmov.32\t%P0[0], %1"
1166   [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1169 (define_insn "ashldi3_neon_noclobber"
1170   [(set (match_operand:DI 0 "s_register_operand"            "=w,w")
1171         (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172                    (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173   "TARGET_NEON && reload_completed
1174    && (!CONST_INT_P (operands[2])
1175        || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1176   "@
1177    vshl.u64\t%P0, %P1, %2
1178    vshl.u64\t%P0, %P1, %P2"
1179   [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1182 (define_insn_and_split "ashldi3_neon"
1183   [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?&r, ?w,w")
1184         (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w,w")
1185                    (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm,i")))
1186    (clobber (match_scratch:SI 3                             "= X, X,?&r, X,  X,  X,X"))
1187    (clobber (match_scratch:SI 4                             "= X, X,?&r, X,  X,  X,X"))
1188    (clobber (match_scratch:DI 5                             "=&w, X,  X, X,  X, &w,X"))
1189    (clobber (reg:CC_C CC_REGNUM))]
1190   "TARGET_NEON"
1191   "#"
1192   "TARGET_NEON && reload_completed"
1193   [(const_int 0)]
1194   "
1195   {
1196     if (IS_VFP_REGNUM (REGNO (operands[0])))
1197       {
1198         if (CONST_INT_P (operands[2]))
1199           {
1200             if (INTVAL (operands[2]) < 1)
1201               {
1202                 emit_insn (gen_movdi (operands[0], operands[1]));
1203                 DONE;
1204               }
1205             else if (INTVAL (operands[2]) > 63)
1206               operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1207           }
1208         else
1209           {
1210             emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211             operands[2] = operands[5];
1212           }
1214         /* Ditch the unnecessary clobbers.  */
1215         emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1216                                                operands[2]));
1217       }
1218     else
1219       {
1220         /* The shift expanders support either full overlap or no overlap.  */
1221         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222                     || REGNO (operands[0]) == REGNO (operands[1]));
1224         arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1225                                        operands[2], operands[3], operands[4]);
1226       }
1227     DONE;
1228   }"
1229   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1230    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1231    (set_attr "type" "multiple")]
1234 ; The shift amount needs to be negated for right-shifts
1235 (define_insn "signed_shift_di3_neon"
1236   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1237         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1238                     (match_operand:DI 2 "s_register_operand" " w")]
1239                    UNSPEC_ASHIFT_SIGNED))]
1240   "TARGET_NEON && reload_completed"
1241   "vshl.s64\t%P0, %P1, %P2"
1242   [(set_attr "type" "neon_shift_reg")]
1245 ; The shift amount needs to be negated for right-shifts
1246 (define_insn "unsigned_shift_di3_neon"
1247   [(set (match_operand:DI 0 "s_register_operand"             "=w")
1248         (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1249                     (match_operand:DI 2 "s_register_operand" " w")]
1250                    UNSPEC_ASHIFT_UNSIGNED))]
1251   "TARGET_NEON && reload_completed"
1252   "vshl.u64\t%P0, %P1, %P2"
1253   [(set_attr "type" "neon_shift_reg")]
1256 (define_insn "ashrdi3_neon_imm_noclobber"
1257   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1258         (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1259                      (match_operand:DI 2 "const_int_operand"  " i")))]
1260   "TARGET_NEON && reload_completed
1261    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1262   "vshr.s64\t%P0, %P1, %2"
1263   [(set_attr "type" "neon_shift_imm")]
1266 (define_insn "lshrdi3_neon_imm_noclobber"
1267   [(set (match_operand:DI 0 "s_register_operand"              "=w")
1268         (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1269                      (match_operand:DI 2 "const_int_operand"  " i")))]
1270   "TARGET_NEON && reload_completed
1271    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1272   "vshr.u64\t%P0, %P1, %2"
1273   [(set_attr "type" "neon_shift_imm")]
1276 ;; ashrdi3_neon
1277 ;; lshrdi3_neon
1278 (define_insn_and_split "<shift>di3_neon"
1279   [(set (match_operand:DI 0 "s_register_operand"             "= w, w,?&r,?r,?&r,?w,?w")
1280         (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
1281                     (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
1282    (clobber (match_scratch:SI 3                              "=2r, X, &r, X,  X,2r, X"))
1283    (clobber (match_scratch:SI 4                              "= X, X, &r, X,  X, X, X"))
1284    (clobber (match_scratch:DI 5                              "=&w, X,  X, X, X,&w, X"))
1285    (clobber (reg:CC CC_REGNUM))]
1286   "TARGET_NEON"
1287   "#"
1288   "TARGET_NEON && reload_completed"
1289   [(const_int 0)]
1290   "
1291   {
1292     if (IS_VFP_REGNUM (REGNO (operands[0])))
1293       {
1294         if (CONST_INT_P (operands[2]))
1295           {
1296             if (INTVAL (operands[2]) < 1)
1297               {
1298                 emit_insn (gen_movdi (operands[0], operands[1]));
1299                 DONE;
1300               }
1301             else if (INTVAL (operands[2]) > 64)
1302               operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1304             /* Ditch the unnecessary clobbers.  */
1305             emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1306                                                           operands[1],
1307                                                           operands[2]));
1308           }
1309         else 
1310           {
1311             /* We must use a negative left-shift.  */
1312             emit_insn (gen_negsi2 (operands[3], operands[2]));
1313             emit_insn (gen_neon_load_count (operands[5], operands[3]));
1314             emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1315                                                        operands[5]));
1316           }
1317       }
1318     else
1319       {
1320         /* The shift expanders support either full overlap or no overlap.  */
1321         gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1322                     || REGNO (operands[0]) == REGNO (operands[1]));
1324         /* This clobbers CC (ASHIFTRT by register only).  */
1325         arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1326                                        operands[2], operands[3], operands[4]);
1327       }
1329     DONE;
1330   }"
1331   [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1332    (set_attr "opt" "*,*,speed,speed,speed,*,*")
1333    (set_attr "type" "multiple")]
1336 ;; Widening operations
1338 (define_expand "widen_ssum<mode>3"
1339   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1340         (plus:<V_double_width>
1341          (sign_extend:<V_double_width>
1342           (match_operand:VQI 1 "s_register_operand" ""))
1343          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1344   "TARGET_NEON"
1345   {
1346     machine_mode mode = GET_MODE (operands[1]);
1347     rtx p1, p2;
1349     p1  = arm_simd_vect_par_cnst_half (mode, false);
1350     p2  = arm_simd_vect_par_cnst_half (mode, true);
1352     if (operands[0] != operands[2])
1353       emit_move_insn (operands[0], operands[2]);
1355     emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1356                                                          operands[1],
1357                                                          p1,
1358                                                          operands[0]));
1359     emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1360                                                          operands[1],
1361                                                          p2,
1362                                                          operands[0]));
1363     DONE;
1364   }
1367 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1368   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1369         (plus:<V_double_width>
1370          (sign_extend:<V_double_width>
1371           (vec_select:<V_HALF>
1372            (match_operand:VQI 1 "s_register_operand" "%w")
1373            (match_operand:VQI 2 "vect_par_constant_low" "")))
1374          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1375   "TARGET_NEON"
1377   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1378     "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1380   [(set_attr "type" "neon_add_widen")])
1382 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1383   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1384         (plus:<V_double_width>
1385          (sign_extend:<V_double_width>
1386           (vec_select:<V_HALF>
1387                          (match_operand:VQI 1 "s_register_operand" "%w")
1388                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1389          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1390   "TARGET_NEON"
1392   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1393     "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1395   [(set_attr "type" "neon_add_widen")])
1397 (define_insn "widen_ssum<mode>3"
1398   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1399         (plus:<V_widen>
1400          (sign_extend:<V_widen>
1401           (match_operand:VW 1 "s_register_operand" "%w"))
1402          (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1403   "TARGET_NEON"
1404   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1405   [(set_attr "type" "neon_add_widen")]
1408 (define_expand "widen_usum<mode>3"
1409   [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1410         (plus:<V_double_width>
1411          (zero_extend:<V_double_width>
1412           (match_operand:VQI 1 "s_register_operand" ""))
1413          (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1414   "TARGET_NEON"
1415   {
1416     machine_mode mode = GET_MODE (operands[1]);
1417     rtx p1, p2;
1419     p1  = arm_simd_vect_par_cnst_half (mode, false);
1420     p2  = arm_simd_vect_par_cnst_half (mode, true);
1422     if (operands[0] != operands[2])
1423       emit_move_insn (operands[0], operands[2]);
1425     emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1426                                                          operands[1],
1427                                                          p1,
1428                                                          operands[0]));
1429     emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1430                                                          operands[1],
1431                                                          p2,
1432                                                          operands[0]));
1433     DONE;
1434   }
1437 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1438   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1439         (plus:<V_double_width>
1440          (zero_extend:<V_double_width>
1441           (vec_select:<V_HALF>
1442            (match_operand:VQI 1 "s_register_operand" "%w")
1443            (match_operand:VQI 2 "vect_par_constant_low" "")))
1444          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1445   "TARGET_NEON"
1447   return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1448     "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1450   [(set_attr "type" "neon_add_widen")])
1452 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1453   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1454         (plus:<V_double_width>
1455          (zero_extend:<V_double_width>
1456           (vec_select:<V_HALF>
1457                          (match_operand:VQI 1 "s_register_operand" "%w")
1458                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1459          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1460   "TARGET_NEON"
1462  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1463     "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1465   [(set_attr "type" "neon_add_widen")])
1467 (define_insn "widen_usum<mode>3"
1468   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1469         (plus:<V_widen> (zero_extend:<V_widen>
1470                           (match_operand:VW 1 "s_register_operand" "%w"))
1471                         (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1472   "TARGET_NEON"
1473   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1474   [(set_attr "type" "neon_add_widen")]
1477 ;; Helpers for quad-word reduction operations
1479 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1480 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1481 ; N/2-element vector.
1483 (define_insn "quad_halves_<code>v4si"
1484   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1485         (VQH_OPS:V2SI
1486           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1487                            (parallel [(const_int 0) (const_int 1)]))
1488           (vec_select:V2SI (match_dup 1)
1489                            (parallel [(const_int 2) (const_int 3)]))))]
1490   "TARGET_NEON"
1491   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1492   [(set_attr "vqh_mnem" "<VQH_mnem>")
1493    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1496 (define_insn "quad_halves_<code>v4sf"
1497   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1498         (VQHS_OPS:V2SF
1499           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1500                            (parallel [(const_int 0) (const_int 1)]))
1501           (vec_select:V2SF (match_dup 1)
1502                            (parallel [(const_int 2) (const_int 3)]))))]
1503   "TARGET_NEON && flag_unsafe_math_optimizations"
1504   "<VQH_mnem>.f32\t%P0, %e1, %f1"
1505   [(set_attr "vqh_mnem" "<VQH_mnem>")
1506    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1509 (define_insn "quad_halves_<code>v8hi"
1510   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1511         (VQH_OPS:V4HI
1512           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1513                            (parallel [(const_int 0) (const_int 1)
1514                                       (const_int 2) (const_int 3)]))
1515           (vec_select:V4HI (match_dup 1)
1516                            (parallel [(const_int 4) (const_int 5)
1517                                       (const_int 6) (const_int 7)]))))]
1518   "TARGET_NEON"
1519   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1520   [(set_attr "vqh_mnem" "<VQH_mnem>")
1521    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1524 (define_insn "quad_halves_<code>v16qi"
1525   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1526         (VQH_OPS:V8QI
1527           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1528                            (parallel [(const_int 0) (const_int 1)
1529                                       (const_int 2) (const_int 3)
1530                                       (const_int 4) (const_int 5)
1531                                       (const_int 6) (const_int 7)]))
1532           (vec_select:V8QI (match_dup 1)
1533                            (parallel [(const_int 8) (const_int 9)
1534                                       (const_int 10) (const_int 11)
1535                                       (const_int 12) (const_int 13)
1536                                       (const_int 14) (const_int 15)]))))]
1537   "TARGET_NEON"
1538   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1539   [(set_attr "vqh_mnem" "<VQH_mnem>")
1540    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1543 (define_expand "move_hi_quad_<mode>"
1544  [(match_operand:ANY128 0 "s_register_operand" "")
1545   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1546  "TARGET_NEON"
1548   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1549                                        GET_MODE_SIZE (<V_HALF>mode)),
1550                   operands[1]);
1551   DONE;
1554 (define_expand "move_lo_quad_<mode>"
1555  [(match_operand:ANY128 0 "s_register_operand" "")
1556   (match_operand:<V_HALF> 1 "s_register_operand" "")]
1557  "TARGET_NEON"
1559   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1560                                        <MODE>mode, 0),
1561                   operands[1]);
1562   DONE;
1565 ;; Reduction operations
1567 (define_expand "reduc_plus_scal_<mode>"
1568   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1569    (match_operand:VD 1 "s_register_operand" "")]
1570   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1572   rtx vec = gen_reg_rtx (<MODE>mode);
1573   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1574                         &gen_neon_vpadd_internal<mode>);
1575   /* The same result is actually computed into every element.  */
1576   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1577   DONE;
1580 (define_expand "reduc_plus_scal_<mode>"
1581   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1582    (match_operand:VQ 1 "s_register_operand" "")]
1583   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1584    && !BYTES_BIG_ENDIAN"
1586   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1588   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1589   emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1591   DONE;
1594 (define_expand "reduc_plus_scal_v2di"
1595   [(match_operand:DI 0 "nonimmediate_operand" "=w")
1596    (match_operand:V2DI 1 "s_register_operand" "")]
1597   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1599   rtx vec = gen_reg_rtx (V2DImode);
1601   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1602   emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1604   DONE;
1607 (define_insn "arm_reduc_plus_internal_v2di"
1608   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1609         (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1610                      UNSPEC_VPADD))]
1611   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1612   "vadd.i64\t%e0, %e1, %f1"
1613   [(set_attr "type" "neon_add_q")]
1616 (define_expand "reduc_smin_scal_<mode>"
1617   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1618    (match_operand:VD 1 "s_register_operand" "")]
1619   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1621   rtx vec = gen_reg_rtx (<MODE>mode);
1623   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1624                         &gen_neon_vpsmin<mode>);
1625   /* The result is computed into every element of the vector.  */
1626   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1627   DONE;
1630 (define_expand "reduc_smin_scal_<mode>"
1631   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1632    (match_operand:VQ 1 "s_register_operand" "")]
1633   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1634    && !BYTES_BIG_ENDIAN"
1636   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1638   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1639   emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1641   DONE;
1644 (define_expand "reduc_smax_scal_<mode>"
1645   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1646    (match_operand:VD 1 "s_register_operand" "")]
1647   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1649   rtx vec = gen_reg_rtx (<MODE>mode);
1650   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1651                         &gen_neon_vpsmax<mode>);
1652   /* The result is computed into every element of the vector.  */
1653   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1654   DONE;
1657 (define_expand "reduc_smax_scal_<mode>"
1658   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1659    (match_operand:VQ 1 "s_register_operand" "")]
1660   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1661    && !BYTES_BIG_ENDIAN"
1663   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1665   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1666   emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1668   DONE;
1671 (define_expand "reduc_umin_scal_<mode>"
1672   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1673    (match_operand:VDI 1 "s_register_operand" "")]
1674   "TARGET_NEON"
1676   rtx vec = gen_reg_rtx (<MODE>mode);
1677   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1678                         &gen_neon_vpumin<mode>);
1679   /* The result is computed into every element of the vector.  */
1680   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1681   DONE;
1684 (define_expand "reduc_umin_scal_<mode>"
1685   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1686    (match_operand:VQI 1 "s_register_operand" "")]
1687   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1689   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1691   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1692   emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1694   DONE;
1697 (define_expand "reduc_umax_scal_<mode>"
1698   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1699    (match_operand:VDI 1 "s_register_operand" "")]
1700   "TARGET_NEON"
1702   rtx vec = gen_reg_rtx (<MODE>mode);
1703   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1704                         &gen_neon_vpumax<mode>);
1705   /* The result is computed into every element of the vector.  */
1706   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1707   DONE;
1710 (define_expand "reduc_umax_scal_<mode>"
1711   [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1712    (match_operand:VQI 1 "s_register_operand" "")]
1713   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1715   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1717   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1718   emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1720   DONE;
1723 (define_insn "neon_vpadd_internal<mode>"
1724   [(set (match_operand:VD 0 "s_register_operand" "=w")
1725         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1726                     (match_operand:VD 2 "s_register_operand" "w")]
1727                    UNSPEC_VPADD))]
1728   "TARGET_NEON"
1729   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1730   ;; Assume this schedules like vadd.
1731   [(set (attr "type")
1732       (if_then_else (match_test "<Is_float_mode>")
1733                     (const_string "neon_fp_reduc_add_s<q>")
1734                     (const_string "neon_reduc_add<q>")))]
1737 (define_insn "neon_vpaddv4hf"
1738  [(set
1739    (match_operand:V4HF 0 "s_register_operand" "=w")
1740    (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1741                  (match_operand:V4HF 2 "s_register_operand" "w")]
1742     UNSPEC_VPADD))]
1743  "TARGET_NEON_FP16INST"
1744  "vpadd.f16\t%P0, %P1, %P2"
1745  [(set_attr "type" "neon_reduc_add")]
1748 (define_insn "neon_vpsmin<mode>"
1749   [(set (match_operand:VD 0 "s_register_operand" "=w")
1750         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1751                     (match_operand:VD 2 "s_register_operand" "w")]
1752                    UNSPEC_VPSMIN))]
1753   "TARGET_NEON"
1754   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1755   [(set (attr "type")
1756       (if_then_else (match_test "<Is_float_mode>")
1757                     (const_string "neon_fp_reduc_minmax_s<q>")
1758                     (const_string "neon_reduc_minmax<q>")))]
1761 (define_insn "neon_vpsmax<mode>"
1762   [(set (match_operand:VD 0 "s_register_operand" "=w")
1763         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1764                     (match_operand:VD 2 "s_register_operand" "w")]
1765                    UNSPEC_VPSMAX))]
1766   "TARGET_NEON"
1767   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1768   [(set (attr "type")
1769       (if_then_else (match_test "<Is_float_mode>")
1770                     (const_string "neon_fp_reduc_minmax_s<q>")
1771                     (const_string "neon_reduc_minmax<q>")))]
1774 (define_insn "neon_vpumin<mode>"
1775   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1776         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1777                      (match_operand:VDI 2 "s_register_operand" "w")]
1778                    UNSPEC_VPUMIN))]
1779   "TARGET_NEON"
1780   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1781   [(set_attr "type" "neon_reduc_minmax<q>")]
1784 (define_insn "neon_vpumax<mode>"
1785   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1786         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1787                      (match_operand:VDI 2 "s_register_operand" "w")]
1788                    UNSPEC_VPUMAX))]
1789   "TARGET_NEON"
1790   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1791   [(set_attr "type" "neon_reduc_minmax<q>")]
1794 ;; Saturating arithmetic
1796 ; NOTE: Neon supports many more saturating variants of instructions than the
1797 ; following, but these are all GCC currently understands.
1798 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1799 ; yet either, although these patterns may be used by intrinsics when they're
1800 ; added.
1802 (define_insn "*ss_add<mode>_neon"
1803   [(set (match_operand:VD 0 "s_register_operand" "=w")
1804        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1805                    (match_operand:VD 2 "s_register_operand" "w")))]
1806   "TARGET_NEON"
1807   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1808   [(set_attr "type" "neon_qadd<q>")]
1811 (define_insn "*us_add<mode>_neon"
1812   [(set (match_operand:VD 0 "s_register_operand" "=w")
1813        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1814                    (match_operand:VD 2 "s_register_operand" "w")))]
1815   "TARGET_NEON"
1816   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1817   [(set_attr "type" "neon_qadd<q>")]
1820 (define_insn "*ss_sub<mode>_neon"
1821   [(set (match_operand:VD 0 "s_register_operand" "=w")
1822        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1823                     (match_operand:VD 2 "s_register_operand" "w")))]
1824   "TARGET_NEON"
1825   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1826   [(set_attr "type" "neon_qsub<q>")]
1829 (define_insn "*us_sub<mode>_neon"
1830   [(set (match_operand:VD 0 "s_register_operand" "=w")
1831        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1832                     (match_operand:VD 2 "s_register_operand" "w")))]
1833   "TARGET_NEON"
1834   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1835   [(set_attr "type" "neon_qsub<q>")]
1838 ;; Conditional instructions.  These are comparisons with conditional moves for
1839 ;; vectors.  They perform the assignment:
1840 ;;   
1841 ;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1843 ;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1844 ;; element-wise.
1846 (define_expand "vcond<mode><mode>"
1847   [(set (match_operand:VDQW 0 "s_register_operand" "")
1848         (if_then_else:VDQW
1849           (match_operator 3 "comparison_operator"
1850             [(match_operand:VDQW 4 "s_register_operand" "")
1851              (match_operand:VDQW 5 "nonmemory_operand" "")])
1852           (match_operand:VDQW 1 "s_register_operand" "")
1853           (match_operand:VDQW 2 "s_register_operand" "")))]
1854   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1856   int inverse = 0;
1857   int use_zero_form = 0;
1858   int swap_bsl_operands = 0;
1859   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1860   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1862   rtx (*base_comparison) (rtx, rtx, rtx);
1863   rtx (*complimentary_comparison) (rtx, rtx, rtx);
1865   switch (GET_CODE (operands[3]))
1866     {
1867     case GE:
1868     case GT:
1869     case LE:
1870     case LT:
1871     case EQ:
1872       if (operands[5] == CONST0_RTX (<MODE>mode))
1873         {
1874           use_zero_form = 1;
1875           break;
1876         }
1877       /* Fall through.  */
1878     default:
1879       if (!REG_P (operands[5]))
1880         operands[5] = force_reg (<MODE>mode, operands[5]);
1881     }
1883   switch (GET_CODE (operands[3]))
1884     {
1885     case LT:
1886     case UNLT:
1887       inverse = 1;
1888       /* Fall through.  */
1889     case GE:
1890     case UNGE:
1891     case ORDERED:
1892     case UNORDERED:
1893       base_comparison = gen_neon_vcge<mode>;
1894       complimentary_comparison = gen_neon_vcgt<mode>;
1895       break;
1896     case LE:
1897     case UNLE:
1898       inverse = 1;
1899       /* Fall through.  */
1900     case GT:
1901     case UNGT:
1902       base_comparison = gen_neon_vcgt<mode>;
1903       complimentary_comparison = gen_neon_vcge<mode>;
1904       break;
1905     case EQ:
1906     case NE:
1907     case UNEQ:
1908       base_comparison = gen_neon_vceq<mode>;
1909       complimentary_comparison = gen_neon_vceq<mode>;
1910       break;
1911     default:
1912       gcc_unreachable ();
1913     }
1915   switch (GET_CODE (operands[3]))
1916     {
1917     case LT:
1918     case LE:
1919     case GT:
1920     case GE:
1921     case EQ:
1922       /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1923          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1924          a GE b -> a GE b
1925          a GT b -> a GT b
1926          a LE b -> b GE a
1927          a LT b -> b GT a
1928          a EQ b -> a EQ b
1929          Note that there also exist direct comparison against 0 forms,
1930          so catch those as a special case.  */
1931       if (use_zero_form)
1932         {
1933           inverse = 0;
1934           switch (GET_CODE (operands[3]))
1935             {
1936             case LT:
1937               base_comparison = gen_neon_vclt<mode>;
1938               break;
1939             case LE:
1940               base_comparison = gen_neon_vcle<mode>;
1941               break;
1942             default:
1943               /* Do nothing, other zero form cases already have the correct
1944                  base_comparison.  */
1945               break;
1946             }
1947         }
1949       if (!inverse)
1950         emit_insn (base_comparison (mask, operands[4], operands[5]));
1951       else
1952         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1953       break;
1954     case UNLT:
1955     case UNLE:
1956     case UNGT:
1957     case UNGE:
1958     case NE:
1959       /* Vector compare returns false for lanes which are unordered, so if we use
1960          the inverse of the comparison we actually want to emit, then
1961          swap the operands to BSL, we will end up with the correct result.
1962          Note that a NE NaN and NaN NE b are true for all a, b.
1964          Our transformations are:
1965          a GE b -> !(b GT a)
1966          a GT b -> !(b GE a)
1967          a LE b -> !(a GT b)
1968          a LT b -> !(a GE b)
1969          a NE b -> !(a EQ b)  */
1971       if (inverse)
1972         emit_insn (base_comparison (mask, operands[4], operands[5]));
1973       else
1974         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1976       swap_bsl_operands = 1;
1977       break;
1978     case UNEQ:
1979       /* We check (a > b ||  b > a).  combining these comparisons give us
1980          true iff !(a != b && a ORDERED b), swapping the operands to BSL
1981          will then give us (a == b ||  a UNORDERED b) as intended.  */
1983       emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1984       emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1985       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1986       swap_bsl_operands = 1;
1987       break;
1988     case UNORDERED:
1989        /* Operands are ORDERED iff (a > b || b >= a).
1990          Swapping the operands to BSL will give the UNORDERED case.  */
1991      swap_bsl_operands = 1;
1992      /* Fall through.  */
1993     case ORDERED:
1994       emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1995       emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1996       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1997       break;
1998     default:
1999       gcc_unreachable ();
2000     }
2002   if (swap_bsl_operands)
2003     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2004                                     operands[1]));
2005   else
2006     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2007                                     operands[2]));
2008   DONE;
2011 (define_expand "vcondu<mode><mode>"
2012   [(set (match_operand:VDQIW 0 "s_register_operand" "")
2013         (if_then_else:VDQIW
2014           (match_operator 3 "arm_comparison_operator"
2015             [(match_operand:VDQIW 4 "s_register_operand" "")
2016              (match_operand:VDQIW 5 "s_register_operand" "")])
2017           (match_operand:VDQIW 1 "s_register_operand" "")
2018           (match_operand:VDQIW 2 "s_register_operand" "")))]
2019   "TARGET_NEON"
2021   rtx mask;
2022   int inverse = 0, immediate_zero = 0;
2023   
2024   mask = gen_reg_rtx (<V_cmp_result>mode);
2025   
2026   if (operands[5] == CONST0_RTX (<MODE>mode))
2027     immediate_zero = 1;
2028   else if (!REG_P (operands[5]))
2029     operands[5] = force_reg (<MODE>mode, operands[5]);
2030   
2031   switch (GET_CODE (operands[3]))
2032     {
2033     case GEU:
2034       emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2035       break;
2036     
2037     case GTU:
2038       emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2039       break;
2040     
2041     case EQ:
2042       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2043       break;
2044     
2045     case LEU:
2046       if (immediate_zero)
2047         emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2048       else
2049         emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2050       break;
2051     
2052     case LTU:
2053       if (immediate_zero)
2054         emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2055       else
2056         emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2057       break;
2058     
2059     case NE:
2060       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2061       inverse = 1;
2062       break;
2063     
2064     default:
2065       gcc_unreachable ();
2066     }
2067   
2068   if (inverse)
2069     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2070                                     operands[1]));
2071   else
2072     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2073                                     operands[2]));
2075   DONE;
2078 ;; Patterns for builtins.
2080 ; good for plain vadd, vaddq.
2082 (define_expand "neon_vadd<mode>"
2083   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2084    (match_operand:VCVTF 1 "s_register_operand" "w")
2085    (match_operand:VCVTF 2 "s_register_operand" "w")]
2086   "TARGET_NEON"
2088   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2089     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2090   else
2091     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2092                                            operands[2]));
2093   DONE;
2096 (define_expand "neon_vadd<mode>"
2097   [(match_operand:VH 0 "s_register_operand")
2098    (match_operand:VH 1 "s_register_operand")
2099    (match_operand:VH 2 "s_register_operand")]
2100   "TARGET_NEON_FP16INST"
2102   emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2103   DONE;
2106 (define_expand "neon_vsub<mode>"
2107   [(match_operand:VH 0 "s_register_operand")
2108    (match_operand:VH 1 "s_register_operand")
2109    (match_operand:VH 2 "s_register_operand")]
2110   "TARGET_NEON_FP16INST"
2112   emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2113   DONE;
2116 ; Note that NEON operations don't support the full IEEE 754 standard: in
2117 ; particular, denormal values are flushed to zero.  This means that GCC cannot
2118 ; use those instructions for autovectorization, etc. unless
2119 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2120 ; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
2121 ; header) must work in either case: if -funsafe-math-optimizations is given,
2122 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2123 ; expand to unspecs (which may potentially limit the extent to which they might
2124 ; be optimized by generic code).
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2128 (define_insn "neon_vadd<mode>_unspec"
2129   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2130         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2131                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2132                      UNSPEC_VADD))]
2133   "TARGET_NEON"
2134   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2135   [(set (attr "type")
2136       (if_then_else (match_test "<Is_float_mode>")
2137                     (const_string "neon_fp_addsub_s<q>")
2138                     (const_string "neon_add<q>")))]
2141 (define_insn "neon_vaddl<sup><mode>"
2142   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2143         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2144                            (match_operand:VDI 2 "s_register_operand" "w")]
2145                           VADDL))]
2146   "TARGET_NEON"
2147   "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2148   [(set_attr "type" "neon_add_long")]
2151 (define_insn "neon_vaddw<sup><mode>"
2152   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2153         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2154                            (match_operand:VDI 2 "s_register_operand" "w")]
2155                           VADDW))]
2156   "TARGET_NEON"
2157   "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2158   [(set_attr "type" "neon_add_widen")]
2161 ; vhadd and vrhadd.
2163 (define_insn "neon_v<r>hadd<sup><mode>"
2164   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2165         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2166                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2167                       VHADD))]
2168   "TARGET_NEON"
2169   "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170   [(set_attr "type" "neon_add_halve_q")]
2173 (define_insn "neon_vqadd<sup><mode>"
2174   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2175         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2176                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2177                      VQADD))]
2178   "TARGET_NEON"
2179   "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2180   [(set_attr "type" "neon_qadd<q>")]
2183 (define_insn "neon_v<r>addhn<mode>"
2184   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2185         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2186                             (match_operand:VN 2 "s_register_operand" "w")]
2187                            VADDHN))]
2188   "TARGET_NEON"
2189   "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2190   [(set_attr "type" "neon_add_halve_narrow_q")]
2193 ;; Polynomial and Float multiplication.
2194 (define_insn "neon_vmul<pf><mode>"
2195   [(set (match_operand:VPF 0 "s_register_operand" "=w")
2196         (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2197                       (match_operand:VPF 2 "s_register_operand" "w")]
2198                      UNSPEC_VMUL))]
2199   "TARGET_NEON"
2200   "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2201   [(set (attr "type")
2202       (if_then_else (match_test "<Is_float_mode>")
2203                     (const_string "neon_fp_mul_s<q>")
2204                     (const_string "neon_mul_<V_elem_ch><q>")))]
2207 (define_insn "mul<mode>3"
2208  [(set
2209    (match_operand:VH 0 "s_register_operand" "=w")
2210    (mult:VH
2211     (match_operand:VH 1 "s_register_operand" "w")
2212     (match_operand:VH 2 "s_register_operand" "w")))]
2213   "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2214   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2215  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2218 (define_insn "neon_vmulf<mode>"
2219  [(set
2220    (match_operand:VH 0 "s_register_operand" "=w")
2221    (mult:VH
2222     (match_operand:VH 1 "s_register_operand" "w")
2223     (match_operand:VH 2 "s_register_operand" "w")))]
2224   "TARGET_NEON_FP16INST"
2225   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2226  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2229 (define_expand "neon_vmla<mode>"
2230   [(match_operand:VDQW 0 "s_register_operand" "=w")
2231    (match_operand:VDQW 1 "s_register_operand" "0")
2232    (match_operand:VDQW 2 "s_register_operand" "w")
2233    (match_operand:VDQW 3 "s_register_operand" "w")]
2234   "TARGET_NEON"
2236   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2237     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2238                                              operands[2], operands[3]));
2239   else
2240     emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2241                                            operands[2], operands[3]));
2242   DONE;
2245 (define_expand "neon_vfma<VCVTF:mode>"
2246   [(match_operand:VCVTF 0 "s_register_operand")
2247    (match_operand:VCVTF 1 "s_register_operand")
2248    (match_operand:VCVTF 2 "s_register_operand")
2249    (match_operand:VCVTF 3 "s_register_operand")]
2250   "TARGET_NEON && TARGET_FMA"
2252   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2253                                        operands[1]));
2254   DONE;
2257 (define_expand "neon_vfma<VH:mode>"
2258   [(match_operand:VH 0 "s_register_operand")
2259    (match_operand:VH 1 "s_register_operand")
2260    (match_operand:VH 2 "s_register_operand")
2261    (match_operand:VH 3 "s_register_operand")]
2262   "TARGET_NEON_FP16INST"
2264   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2265                                        operands[1]));
2266   DONE;
2269 (define_expand "neon_vfms<VCVTF:mode>"
2270   [(match_operand:VCVTF 0 "s_register_operand")
2271    (match_operand:VCVTF 1 "s_register_operand")
2272    (match_operand:VCVTF 2 "s_register_operand")
2273    (match_operand:VCVTF 3 "s_register_operand")]
2274   "TARGET_NEON && TARGET_FMA"
2276   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2277                                          operands[1]));
2278   DONE;
2281 (define_expand "neon_vfms<VH:mode>"
2282   [(match_operand:VH 0 "s_register_operand")
2283    (match_operand:VH 1 "s_register_operand")
2284    (match_operand:VH 2 "s_register_operand")
2285    (match_operand:VH 3 "s_register_operand")]
2286   "TARGET_NEON_FP16INST"
2288   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2289                                          operands[1]));
2290   DONE;
2293 ;; The expand RTL structure here is not important.
2294 ;; We use the gen_* functions anyway.
2295 ;; We just need something to wrap the iterators around.
2297 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2298   [(set (match_operand:VCVTF 0 "s_register_operand")
2299      (unspec:VCVTF
2300         [(match_operand:VCVTF 1 "s_register_operand")
2301            (PLUSMINUS:<VFML>
2302              (match_operand:<VFML> 2 "s_register_operand")
2303              (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2304   "TARGET_FP16FML"
2306   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2307   emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2308                                                              operands[1],
2309                                                              operands[2],
2310                                                              operands[3],
2311                                                              half, half));
2312   DONE;
2315 (define_insn "vfmal_low<mode>_intrinsic"
2316  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2317         (fma:VCVTF
2318          (float_extend:VCVTF
2319           (vec_select:<VFMLSEL>
2320            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2321            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2322          (float_extend:VCVTF
2323           (vec_select:<VFMLSEL>
2324            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2325            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2326          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2327  "TARGET_FP16FML"
2328  "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2329  [(set_attr "type" "neon_fp_mla_s<q>")]
2332 (define_insn "vfmsl_high<mode>_intrinsic"
2333  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2334         (fma:VCVTF
2335          (float_extend:VCVTF
2336           (neg:<VFMLSEL>
2337             (vec_select:<VFMLSEL>
2338               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2339               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2340          (float_extend:VCVTF
2341           (vec_select:<VFMLSEL>
2342            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2343            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2344          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2345  "TARGET_FP16FML"
2346  "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2347  [(set_attr "type" "neon_fp_mla_s<q>")]
2350 (define_insn "vfmal_high<mode>_intrinsic"
2351  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2352         (fma:VCVTF
2353          (float_extend:VCVTF
2354           (vec_select:<VFMLSEL>
2355            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2356            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2357          (float_extend:VCVTF
2358           (vec_select:<VFMLSEL>
2359            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2360            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2361          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2362  "TARGET_FP16FML"
2363  "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2364  [(set_attr "type" "neon_fp_mla_s<q>")]
2367 (define_insn "vfmsl_low<mode>_intrinsic"
2368  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2369         (fma:VCVTF
2370          (float_extend:VCVTF
2371           (neg:<VFMLSEL>
2372             (vec_select:<VFMLSEL>
2373               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2374               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2375          (float_extend:VCVTF
2376           (vec_select:<VFMLSEL>
2377            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2378            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2379          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2380  "TARGET_FP16FML"
2381  "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2382  [(set_attr "type" "neon_fp_mla_s<q>")]
2385 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2386   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2387      (unspec:VCVTF
2388         [(match_operand:VCVTF 1 "s_register_operand")
2389          (PLUSMINUS:<VFML>
2390            (match_operand:<VFML> 2 "s_register_operand")
2391            (match_operand:<VFML> 3 "s_register_operand"))
2392          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2393   "TARGET_FP16FML"
2395   rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2396   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2397   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2398                                                (operands[0], operands[1],
2399                                                 operands[2], operands[3],
2400                                                 half, lane));
2401   DONE;
2404 (define_insn "vfmal_lane_low<mode>_intrinsic"
2405  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2406         (fma:VCVTF
2407          (float_extend:VCVTF
2408           (vec_select:<VFMLSEL>
2409            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2410            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2411          (float_extend:VCVTF
2412            (vec_duplicate:<VFMLSEL>
2413              (vec_select:HF
2414                (match_operand:<VFML> 3 "s_register_operand" "x")
2415                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2416          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2417  "TARGET_FP16FML"
2419     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2420     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2421       {
2422         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2423         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2424       }
2425     else
2426       {
2427         operands[5] = GEN_INT (lane);
2428         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2429       }
2430   }
2431  [(set_attr "type" "neon_fp_mla_s<q>")]
2434 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2435   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2436      (unspec:VCVTF
2437         [(match_operand:VCVTF 1 "s_register_operand")
2438          (PLUSMINUS:<VFML>
2439            (match_operand:<VFML> 2 "s_register_operand")
2440            (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2441          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2442   "TARGET_FP16FML"
2444   rtx lane
2445     = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2446   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2447   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2448                 (operands[0], operands[1], operands[2], operands[3],
2449                  half, lane));
2450   DONE;
2453 ;; Used to implement the intrinsics:
2454 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2455 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2456 ;; Needs a bit of care to get the modes of the different sub-expressions right
2457 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2458 ;; S or D subregister to select the appropriate lane from.
2460 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2461  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2462         (fma:VCVTF
2463          (float_extend:VCVTF
2464           (vec_select:<VFMLSEL>
2465            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2466            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2467          (float_extend:VCVTF
2468            (vec_duplicate:<VFMLSEL>
2469              (vec_select:HF
2470                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2471                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2472          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2473  "TARGET_FP16FML"
2475    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2476    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2477    int new_lane = lane % elts_per_reg;
2478    int regdiff = lane / elts_per_reg;
2479    operands[5] = GEN_INT (new_lane);
2480    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2481       because we want the print_operand code to print the appropriate
2482       S or D register prefix.  */
2483    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2484    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2485    return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2487  [(set_attr "type" "neon_fp_mla_s<q>")]
2490 ;; Used to implement the intrinsics:
2491 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2492 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2493 ;; Needs a bit of care to get the modes of the different sub-expressions right
2494 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2495 ;; S or D subregister to select the appropriate lane from.
2497 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2498  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2499         (fma:VCVTF
2500          (float_extend:VCVTF
2501           (vec_select:<VFMLSEL>
2502            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2503            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2504          (float_extend:VCVTF
2505            (vec_duplicate:<VFMLSEL>
2506              (vec_select:HF
2507                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2508                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2509          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2510  "TARGET_FP16FML"
2512    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2513    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2514    int new_lane = lane % elts_per_reg;
2515    int regdiff = lane / elts_per_reg;
2516    operands[5] = GEN_INT (new_lane);
2517    /* We re-create operands[3] in the halved VFMLSEL mode
2518       because we've calculated the correct half-width subreg to extract
2519       the lane from and we want to print *that* subreg instead.  */
2520    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2521    return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2523  [(set_attr "type" "neon_fp_mla_s<q>")]
2526 (define_insn "vfmal_lane_high<mode>_intrinsic"
2527  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2528         (fma:VCVTF
2529          (float_extend:VCVTF
2530           (vec_select:<VFMLSEL>
2531            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2532            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2533          (float_extend:VCVTF
2534            (vec_duplicate:<VFMLSEL>
2535              (vec_select:HF
2536                (match_operand:<VFML> 3 "s_register_operand" "x")
2537                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2538          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2539  "TARGET_FP16FML"
2540   {
2541     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2542     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2543       {
2544         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2545         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2546       }
2547     else
2548       {
2549         operands[5] = GEN_INT (lane);
2550         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2551       }
2552   }
2553  [(set_attr "type" "neon_fp_mla_s<q>")]
2556 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2557  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2558         (fma:VCVTF
2559          (float_extend:VCVTF
2560           (neg:<VFMLSEL>
2561             (vec_select:<VFMLSEL>
2562               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2563               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2564          (float_extend:VCVTF
2565            (vec_duplicate:<VFMLSEL>
2566              (vec_select:HF
2567                (match_operand:<VFML> 3 "s_register_operand" "x")
2568                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2569          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2570  "TARGET_FP16FML"
2572     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2573     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2574       {
2575         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2576         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2577       }
2578     else
2579       {
2580         operands[5] = GEN_INT (lane);
2581         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2582       }
2583   }
2584  [(set_attr "type" "neon_fp_mla_s<q>")]
2587 ;; Used to implement the intrinsics:
2588 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2589 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2590 ;; Needs a bit of care to get the modes of the different sub-expressions right
2591 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2592 ;; S or D subregister to select the appropriate lane from.
2594 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2595  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2596         (fma:VCVTF
2597          (float_extend:VCVTF
2598           (neg:<VFMLSEL>
2599             (vec_select:<VFMLSEL>
2600               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2601               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2602          (float_extend:VCVTF
2603            (vec_duplicate:<VFMLSEL>
2604              (vec_select:HF
2605                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2606                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2607          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2608  "TARGET_FP16FML"
2610    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2611    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2612    int new_lane = lane % elts_per_reg;
2613    int regdiff = lane / elts_per_reg;
2614    operands[5] = GEN_INT (new_lane);
2615    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2616       because we want the print_operand code to print the appropriate
2617       S or D register prefix.  */
2618    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2619    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2620    return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2622  [(set_attr "type" "neon_fp_mla_s<q>")]
2625 ;; Used to implement the intrinsics:
2626 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2627 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2628 ;; Needs a bit of care to get the modes of the different sub-expressions right
2629 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2630 ;; S or D subregister to select the appropriate lane from.
2632 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2633  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2634         (fma:VCVTF
2635          (float_extend:VCVTF
2636           (neg:<VFMLSEL>
2637             (vec_select:<VFMLSEL>
2638              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2639              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2640          (float_extend:VCVTF
2641            (vec_duplicate:<VFMLSEL>
2642              (vec_select:HF
2643                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2644                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2645          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2646  "TARGET_FP16FML"
2648    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2649    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2650    int new_lane = lane % elts_per_reg;
2651    int regdiff = lane / elts_per_reg;
2652    operands[5] = GEN_INT (new_lane);
2653    /* We re-create operands[3] in the halved VFMLSEL mode
2654       because we've calculated the correct half-width subreg to extract
2655       the lane from and we want to print *that* subreg instead.  */
2656    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2657    return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2659  [(set_attr "type" "neon_fp_mla_s<q>")]
2662 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2663  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2664         (fma:VCVTF
2665          (float_extend:VCVTF
2666           (neg:<VFMLSEL>
2667             (vec_select:<VFMLSEL>
2668              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2669              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2670          (float_extend:VCVTF
2671            (vec_duplicate:<VFMLSEL>
2672              (vec_select:HF
2673                (match_operand:<VFML> 3 "s_register_operand" "x")
2674                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2675          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2676  "TARGET_FP16FML"
2677   {
2678     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2679     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2680       {
2681         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2682         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2683       }
2684     else
2685       {
2686         operands[5] = GEN_INT (lane);
2687         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2688       }
2689   }
2690  [(set_attr "type" "neon_fp_mla_s<q>")]
2693 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2695 (define_insn "neon_vmla<mode>_unspec"
2696   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2697         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2698                       (match_operand:VDQW 2 "s_register_operand" "w")
2699                       (match_operand:VDQW 3 "s_register_operand" "w")]
2700                     UNSPEC_VMLA))]
2701   "TARGET_NEON"
2702   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2703   [(set (attr "type")
2704       (if_then_else (match_test "<Is_float_mode>")
2705                     (const_string "neon_fp_mla_s<q>")
2706                     (const_string "neon_mla_<V_elem_ch><q>")))]
2709 (define_insn "neon_vmlal<sup><mode>"
2710   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2711         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2712                            (match_operand:VW 2 "s_register_operand" "w")
2713                            (match_operand:VW 3 "s_register_operand" "w")]
2714                           VMLAL))]
2715   "TARGET_NEON"
2716   "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2717   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2720 (define_expand "neon_vmls<mode>"
2721   [(match_operand:VDQW 0 "s_register_operand" "=w")
2722    (match_operand:VDQW 1 "s_register_operand" "0")
2723    (match_operand:VDQW 2 "s_register_operand" "w")
2724    (match_operand:VDQW 3 "s_register_operand" "w")]
2725   "TARGET_NEON"
2727   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2728     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2729                  operands[1], operands[2], operands[3]));
2730   else
2731     emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2732                                            operands[2], operands[3]));
2733   DONE;
2736 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2738 (define_insn "neon_vmls<mode>_unspec"
2739   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2740         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2741                       (match_operand:VDQW 2 "s_register_operand" "w")
2742                       (match_operand:VDQW 3 "s_register_operand" "w")]
2743                     UNSPEC_VMLS))]
2744   "TARGET_NEON"
2745   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2746   [(set (attr "type")
2747       (if_then_else (match_test "<Is_float_mode>")
2748                     (const_string "neon_fp_mla_s<q>")
2749                     (const_string "neon_mla_<V_elem_ch><q>")))]
2752 (define_insn "neon_vmlsl<sup><mode>"
2753   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2754         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2755                            (match_operand:VW 2 "s_register_operand" "w")
2756                            (match_operand:VW 3 "s_register_operand" "w")]
2757                           VMLSL))]
2758   "TARGET_NEON"
2759   "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2760   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2763 ;; vqdmulh, vqrdmulh
2764 (define_insn "neon_vq<r>dmulh<mode>"
2765   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2766         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2767                        (match_operand:VMDQI 2 "s_register_operand" "w")]
2768                       VQDMULH))]
2769   "TARGET_NEON"
2770   "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2771   [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2774 ;; vqrdmlah, vqrdmlsh
2775 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2776   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2777         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2778                        (match_operand:VMDQI 2 "s_register_operand" "w")
2779                        (match_operand:VMDQI 3 "s_register_operand" "w")]
2780                       VQRDMLH_AS))]
2781   "TARGET_NEON_RDMA"
2782   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2783   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2786 (define_insn "neon_vqdmlal<mode>"
2787   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2788         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2789                            (match_operand:VMDI 2 "s_register_operand" "w")
2790                            (match_operand:VMDI 3 "s_register_operand" "w")]
2791                           UNSPEC_VQDMLAL))]
2792   "TARGET_NEON"
2793   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2794   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2797 (define_insn "neon_vqdmlsl<mode>"
2798   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2799         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2800                            (match_operand:VMDI 2 "s_register_operand" "w")
2801                            (match_operand:VMDI 3 "s_register_operand" "w")]
2802                           UNSPEC_VQDMLSL))]
2803   "TARGET_NEON"
2804   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2805   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2808 (define_insn "neon_vmull<sup><mode>"
2809   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2811                            (match_operand:VW 2 "s_register_operand" "w")]
2812                           VMULL))]
2813   "TARGET_NEON"
2814   "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2815   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2818 (define_insn "neon_vqdmull<mode>"
2819   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2821                            (match_operand:VMDI 2 "s_register_operand" "w")]
2822                           UNSPEC_VQDMULL))]
2823   "TARGET_NEON"
2824   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2825   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2828 (define_expand "neon_vsub<mode>"
2829   [(match_operand:VCVTF 0 "s_register_operand" "=w")
2830    (match_operand:VCVTF 1 "s_register_operand" "w")
2831    (match_operand:VCVTF 2 "s_register_operand" "w")]
2832   "TARGET_NEON"
2834   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2835     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2836   else
2837     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2838                                            operands[2]));
2839   DONE;
2842 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2844 (define_insn "neon_vsub<mode>_unspec"
2845   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2846         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2847                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2848                      UNSPEC_VSUB))]
2849   "TARGET_NEON"
2850   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2851   [(set (attr "type")
2852       (if_then_else (match_test "<Is_float_mode>")
2853                     (const_string "neon_fp_addsub_s<q>")
2854                     (const_string "neon_sub<q>")))]
2857 (define_insn "neon_vsubl<sup><mode>"
2858   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2859         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2860                            (match_operand:VDI 2 "s_register_operand" "w")]
2861                           VSUBL))]
2862   "TARGET_NEON"
2863   "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2864   [(set_attr "type" "neon_sub_long")]
2867 (define_insn "neon_vsubw<sup><mode>"
2868   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2869         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2870                            (match_operand:VDI 2 "s_register_operand" "w")]
2871                           VSUBW))]
2872   "TARGET_NEON"
2873   "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2874   [(set_attr "type" "neon_sub_widen")]
2877 (define_insn "neon_vqsub<sup><mode>"
2878   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2879         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2880                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2881                       VQSUB))]
2882   "TARGET_NEON"
2883   "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2884   [(set_attr "type" "neon_qsub<q>")]
2887 (define_insn "neon_vhsub<sup><mode>"
2888   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2889         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2890                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2891                       VHSUB))]
2892   "TARGET_NEON"
2893   "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2894   [(set_attr "type" "neon_sub_halve<q>")]
2897 (define_insn "neon_v<r>subhn<mode>"
2898   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2899         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2900                             (match_operand:VN 2 "s_register_operand" "w")]
2901                            VSUBHN))]
2902   "TARGET_NEON"
2903   "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2904   [(set_attr "type" "neon_sub_halve_narrow_q")]
2907 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2908 ;; without unsafe math optimizations.
2909 (define_expand "neon_vc<cmp_op><mode>"
2910   [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2911      (neg:<V_cmp_result>
2912        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2913                          (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2914   "TARGET_NEON"
2915   {
2916     /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2917        are enabled.  */
2918     if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2919         && !flag_unsafe_math_optimizations)
2920       {
2921         /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2922            we define gen_neon_vceq<mode>_insn_unspec only for float modes
2923            whereas this expander iterates over the integer modes as well,
2924            but we will never expand to UNSPECs for the integer comparisons.  */
2925         switch (<MODE>mode)
2926           {
2927             case E_V2SFmode:
2928               emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2929                                                               operands[1],
2930                                                               operands[2]));
2931               break;
2932             case E_V4SFmode:
2933               emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2934                                                               operands[1],
2935                                                               operands[2]));
2936               break;
2937             default:
2938               gcc_unreachable ();
2939           }
2940       }
2941     else
2942       emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2943                                                  operands[1],
2944                                                  operands[2]));
2945     DONE;
2946   }
2949 (define_insn "neon_vc<cmp_op><mode>_insn"
2950   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2951         (neg:<V_cmp_result>
2952           (COMPARISONS:<V_cmp_result>
2953             (match_operand:VDQW 1 "s_register_operand" "w,w")
2954             (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2955   "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2956                     && !flag_unsafe_math_optimizations)"
2957   {
2958     char pattern[100];
2959     sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2960                       " %%<V_reg>1, %s",
2961                        GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2962                          ? "f" : "<cmp_type>",
2963                        which_alternative == 0
2964                          ? "%<V_reg>2" : "#0");
2965     output_asm_insn (pattern, operands);
2966     return "";
2967   }
2968   [(set (attr "type")
2969         (if_then_else (match_operand 2 "zero_operand")
2970                       (const_string "neon_compare_zero<q>")
2971                       (const_string "neon_compare<q>")))]
2974 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2975   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2976         (unspec:<V_cmp_result>
2977           [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2978            (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2979           NEON_VCMP))]
2980   "TARGET_NEON"
2981   {
2982     char pattern[100];
2983     sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2984                        " %%<V_reg>1, %s",
2985                        which_alternative == 0
2986                          ? "%<V_reg>2" : "#0");
2987     output_asm_insn (pattern, operands);
2988     return "";
2990   [(set_attr "type" "neon_fp_compare_s<q>")]
2993 (define_expand "neon_vc<cmp_op><mode>"
2994  [(match_operand:<V_cmp_result> 0 "s_register_operand")
2995   (neg:<V_cmp_result>
2996    (COMPARISONS:VH
2997     (match_operand:VH 1 "s_register_operand")
2998     (match_operand:VH 2 "reg_or_zero_operand")))]
2999  "TARGET_NEON_FP16INST"
3001   /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3002      are enabled.  */
3003   if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3004       && !flag_unsafe_math_optimizations)
3005     emit_insn
3006       (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3007        (operands[0], operands[1], operands[2]));
3008   else
3009     emit_insn
3010       (gen_neon_vc<cmp_op><mode>_fp16insn
3011        (operands[0], operands[1], operands[2]));
3012   DONE;
3015 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3016  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3017    (neg:<V_cmp_result>
3018     (COMPARISONS:<V_cmp_result>
3019      (match_operand:VH 1 "s_register_operand" "w,w")
3020      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3021  "TARGET_NEON_FP16INST
3022   && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3023   && !flag_unsafe_math_optimizations)"
3025   char pattern[100];
3026   sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3027            " %%<V_reg>1, %s",
3028            GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3029            ? "f" : "<cmp_type>",
3030            which_alternative == 0
3031            ? "%<V_reg>2" : "#0");
3032   output_asm_insn (pattern, operands);
3033   return "";
3035  [(set (attr "type")
3036    (if_then_else (match_operand 2 "zero_operand")
3037     (const_string "neon_compare_zero<q>")
3038     (const_string "neon_compare<q>")))])
3040 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3041  [(set
3042    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3043    (unspec:<V_cmp_result>
3044     [(match_operand:VH 1 "s_register_operand" "w,w")
3045      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3046     NEON_VCMP))]
3047  "TARGET_NEON_FP16INST"
3049   char pattern[100];
3050   sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3051            " %%<V_reg>1, %s",
3052            which_alternative == 0
3053            ? "%<V_reg>2" : "#0");
3054   output_asm_insn (pattern, operands);
3055   return "";
3057  [(set_attr "type" "neon_fp_compare_s<q>")])
3059 (define_insn "neon_vc<cmp_op>u<mode>"
3060   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3061         (neg:<V_cmp_result>
3062           (GTUGEU:<V_cmp_result>
3063             (match_operand:VDQIW 1 "s_register_operand" "w")
3064             (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3065   "TARGET_NEON"
3066   "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3067   [(set_attr "type" "neon_compare<q>")]
3070 (define_expand "neon_vca<cmp_op><mode>"
3071   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3072         (neg:<V_cmp_result>
3073           (GTGE:<V_cmp_result>
3074             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3075             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3076   "TARGET_NEON"
3077   {
3078     if (flag_unsafe_math_optimizations)
3079       emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3080                                                   operands[2]));
3081     else
3082       emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3083                                                          operands[1],
3084                                                          operands[2]));
3085     DONE;
3086   }
3089 (define_insn "neon_vca<cmp_op><mode>_insn"
3090   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3091         (neg:<V_cmp_result>
3092           (GTGE:<V_cmp_result>
3093             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3094             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3095   "TARGET_NEON && flag_unsafe_math_optimizations"
3096   "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3097   [(set_attr "type" "neon_fp_compare_s<q>")]
3100 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3101   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3102         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3103                                 (match_operand:VCVTF 2 "s_register_operand" "w")]
3104                                NEON_VACMP))]
3105   "TARGET_NEON"
3106   "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3107   [(set_attr "type" "neon_fp_compare_s<q>")]
3110 (define_expand "neon_vca<cmp_op><mode>"
3111   [(set
3112     (match_operand:<V_cmp_result> 0 "s_register_operand")
3113     (neg:<V_cmp_result>
3114      (GLTE:<V_cmp_result>
3115       (abs:VH (match_operand:VH 1 "s_register_operand"))
3116       (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3117  "TARGET_NEON_FP16INST"
3119   if (flag_unsafe_math_optimizations)
3120     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3121                (operands[0], operands[1], operands[2]));
3122   else
3123     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3124                (operands[0], operands[1], operands[2]));
3125   DONE;
3128 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3129   [(set
3130     (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3131     (neg:<V_cmp_result>
3132      (GLTE:<V_cmp_result>
3133       (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3134       (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3135  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3136  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137  [(set_attr "type" "neon_fp_compare_s<q>")]
3140 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3141  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3142    (unspec:<V_cmp_result>
3143     [(match_operand:VH 1 "s_register_operand" "w")
3144      (match_operand:VH 2 "s_register_operand" "w")]
3145     NEON_VAGLTE))]
3146  "TARGET_NEON"
3147  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3148  [(set_attr "type" "neon_fp_compare_s<q>")]
3151 (define_expand "neon_vc<cmp_op>z<mode>"
3152  [(set
3153    (match_operand:<V_cmp_result> 0 "s_register_operand")
3154    (COMPARISONS:<V_cmp_result>
3155     (match_operand:VH 1 "s_register_operand")
3156     (const_int 0)))]
3157  "TARGET_NEON_FP16INST"
3159   emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3160                                         CONST0_RTX (<MODE>mode)));
3161   DONE;
3164 (define_insn "neon_vtst<mode>"
3165   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3166         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3167                        (match_operand:VDQIW 2 "s_register_operand" "w")]
3168                       UNSPEC_VTST))]
3169   "TARGET_NEON"
3170   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3171   [(set_attr "type" "neon_tst<q>")]
3174 (define_insn "neon_vabd<sup><mode>"
3175   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3176         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3177                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3178                      VABD))]
3179   "TARGET_NEON"
3180   "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3181   [(set_attr "type" "neon_abd<q>")]
3184 (define_insn "neon_vabd<mode>"
3185   [(set (match_operand:VH 0 "s_register_operand" "=w")
3186     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3187                 (match_operand:VH 2 "s_register_operand" "w")]
3188      UNSPEC_VABD_F))]
3189  "TARGET_NEON_FP16INST"
3190  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3191   [(set_attr "type" "neon_abd<q>")]
3194 (define_insn "neon_vabdf<mode>"
3195   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3196         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3197                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3198                      UNSPEC_VABD_F))]
3199   "TARGET_NEON"
3200   "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3201   [(set_attr "type" "neon_fp_abd_s<q>")]
3204 (define_insn "neon_vabdl<sup><mode>"
3205   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3206         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3207                            (match_operand:VW 2 "s_register_operand" "w")]
3208                           VABDL))]
3209   "TARGET_NEON"
3210   "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3211   [(set_attr "type" "neon_abd_long")]
3214 (define_insn "neon_vaba<sup><mode>"
3215   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3216         (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3217                                    (match_operand:VDQIW 3 "s_register_operand" "w")]
3218                                   VABD)
3219                     (match_operand:VDQIW 1 "s_register_operand" "0")))]
3220   "TARGET_NEON"
3221   "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3222   [(set_attr "type" "neon_arith_acc<q>")]
3225 (define_insn "neon_vabal<sup><mode>"
3226   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227         (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3228                                            (match_operand:VW 3 "s_register_operand" "w")]
3229                                            VABDL)
3230                          (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3231   "TARGET_NEON"
3232   "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3233   [(set_attr "type" "neon_arith_acc<q>")]
3236 (define_insn "neon_v<maxmin><sup><mode>"
3237   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3238         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3239                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3240                      VMAXMIN))]
3241   "TARGET_NEON"
3242   "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3243   [(set_attr "type" "neon_minmax<q>")]
3246 (define_insn "neon_v<maxmin>f<mode>"
3247   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3248         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3249                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3250                      VMAXMINF))]
3251   "TARGET_NEON"
3252   "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3253   [(set_attr "type" "neon_fp_minmax_s<q>")]
3256 (define_insn "neon_v<maxmin>f<mode>"
3257  [(set (match_operand:VH 0 "s_register_operand" "=w")
3258    (unspec:VH
3259     [(match_operand:VH 1 "s_register_operand" "w")
3260      (match_operand:VH 2 "s_register_operand" "w")]
3261     VMAXMINF))]
3262  "TARGET_NEON_FP16INST"
3263  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264  [(set_attr "type" "neon_fp_minmax_s<q>")]
3267 (define_insn "neon_vp<maxmin>fv4hf"
3268  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3269    (unspec:V4HF
3270     [(match_operand:V4HF 1 "s_register_operand" "w")
3271      (match_operand:V4HF 2 "s_register_operand" "w")]
3272     VPMAXMINF))]
3273  "TARGET_NEON_FP16INST"
3274  "vp<maxmin>.f16\t%P0, %P1, %P2"
3275   [(set_attr "type" "neon_reduc_minmax")]
3278 (define_insn "neon_<fmaxmin_op><mode>"
3279  [(set
3280    (match_operand:VH 0 "s_register_operand" "=w")
3281    (unspec:VH
3282     [(match_operand:VH 1 "s_register_operand" "w")
3283      (match_operand:VH 2 "s_register_operand" "w")]
3284     VMAXMINFNM))]
3285  "TARGET_NEON_FP16INST"
3286  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3287  [(set_attr "type" "neon_fp_minmax_s<q>")]
3290 ;; v<maxmin>nm intrinsics.
3291 (define_insn "neon_<fmaxmin_op><mode>"
3292   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3293         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3294                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3295                        VMAXMINFNM))]
3296   "TARGET_NEON && TARGET_VFP5"
3297   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3298   [(set_attr "type" "neon_fp_minmax_s<q>")]
3301 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3302 (define_insn "<fmaxmin><mode>3"
3303   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3304         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3305                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3306                        VMAXMINFNM))]
3307   "TARGET_NEON && TARGET_VFP5"
3308   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3309   [(set_attr "type" "neon_fp_minmax_s<q>")]
3312 (define_expand "neon_vpadd<mode>"
3313   [(match_operand:VD 0 "s_register_operand" "=w")
3314    (match_operand:VD 1 "s_register_operand" "w")
3315    (match_operand:VD 2 "s_register_operand" "w")]
3316   "TARGET_NEON"
3318   emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3319                                             operands[2]));
3320   DONE;
3323 (define_insn "neon_vpaddl<sup><mode>"
3324   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3325         (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3326                                  VPADDL))]
3327   "TARGET_NEON"
3328   "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3329   [(set_attr "type" "neon_reduc_add_long")]
3332 (define_insn "neon_vpadal<sup><mode>"
3333   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3334         (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3335                                   (match_operand:VDQIW 2 "s_register_operand" "w")]
3336                                  VPADAL))]
3337   "TARGET_NEON"
3338   "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3339   [(set_attr "type" "neon_reduc_add_acc")]
3342 (define_insn "neon_vp<maxmin><sup><mode>"
3343   [(set (match_operand:VDI 0 "s_register_operand" "=w")
3344         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3345                     (match_operand:VDI 2 "s_register_operand" "w")]
3346                    VPMAXMIN))]
3347   "TARGET_NEON"
3348   "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3349   [(set_attr "type" "neon_reduc_minmax<q>")]
3352 (define_insn "neon_vp<maxmin>f<mode>"
3353   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3354         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3355                     (match_operand:VCVTF 2 "s_register_operand" "w")]
3356                    VPMAXMINF))]
3357   "TARGET_NEON"
3358   "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3359   [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3362 (define_insn "neon_vrecps<mode>"
3363   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3364         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3365                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3366                       UNSPEC_VRECPS))]
3367   "TARGET_NEON"
3368   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3369   [(set_attr "type" "neon_fp_recps_s<q>")]
3372 (define_insn "neon_vrecps<mode>"
3373   [(set
3374     (match_operand:VH 0 "s_register_operand" "=w")
3375     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3376                 (match_operand:VH 2 "s_register_operand" "w")]
3377      UNSPEC_VRECPS))]
3378   "TARGET_NEON_FP16INST"
3379   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380   [(set_attr "type" "neon_fp_recps_s<q>")]
3383 (define_insn "neon_vrsqrts<mode>"
3384   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3387                       UNSPEC_VRSQRTS))]
3388   "TARGET_NEON"
3389   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390   [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3393 (define_insn "neon_vrsqrts<mode>"
3394   [(set
3395     (match_operand:VH 0 "s_register_operand" "=w")
3396     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397                  (match_operand:VH 2 "s_register_operand" "w")]
3398      UNSPEC_VRSQRTS))]
3399  "TARGET_NEON_FP16INST"
3400  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3404 (define_expand "neon_vabs<mode>"
3405   [(match_operand:VDQW 0 "s_register_operand" "")
3406    (match_operand:VDQW 1 "s_register_operand" "")]
3407   "TARGET_NEON"
3409   emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3410   DONE;
3413 (define_insn "neon_vqabs<mode>"
3414   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3415         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3416                       UNSPEC_VQABS))]
3417   "TARGET_NEON"
3418   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3419   [(set_attr "type" "neon_qabs<q>")]
3422 (define_insn "neon_bswap<mode>"
3423   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3424         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3425   "TARGET_NEON"
3426   "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3427   [(set_attr "type" "neon_rev<q>")]
3430 (define_expand "neon_vneg<mode>"
3431   [(match_operand:VDQW 0 "s_register_operand" "")
3432    (match_operand:VDQW 1 "s_register_operand" "")]
3433   "TARGET_NEON"
3435   emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3436   DONE;
3439 ;; These instructions map to the __builtins for the Dot Product operations.
3440 (define_insn "neon_<sup>dot<vsi2qi>"
3441   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3442         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3443                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3444                                                         "register_operand" "w")
3445                                    (match_operand:<VSI2QI> 3
3446                                                         "register_operand" "w")]
3447                 DOTPROD)))]
3448   "TARGET_DOTPROD"
3449   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3450   [(set_attr "type" "neon_dot")]
3453 ;; These instructions map to the __builtins for the Dot Product
3454 ;; indexed operations.
3455 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3456   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3457         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3458                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3459                                                         "register_operand" "w")
3460                                    (match_operand:V8QI 3 "register_operand" "t")
3461                                    (match_operand:SI 4 "immediate_operand" "i")]
3462                 DOTPROD)))]
3463   "TARGET_DOTPROD"
3464   {
3465     operands[4]
3466       = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3467     return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3468   }
3469   [(set_attr "type" "neon_dot")]
3472 ;; These expands map to the Dot Product optab the vectorizer checks for.
3473 ;; The auto-vectorizer expects a dot product builtin that also does an
3474 ;; accumulation into the provided register.
3475 ;; Given the following pattern
3477 ;; for (i=0; i<len; i++) {
3478 ;;     c = a[i] * b[i];
3479 ;;     r += c;
3480 ;; }
3481 ;; return result;
3483 ;; This can be auto-vectorized to
3484 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3486 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
3487 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3488 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3489 ;; ...
3491 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3492 (define_expand "<sup>dot_prod<vsi2qi>"
3493   [(set (match_operand:VCVTI 0 "register_operand")
3494         (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3495                                                         "register_operand")
3496                                    (match_operand:<VSI2QI> 2
3497                                                         "register_operand")]
3498                      DOTPROD)
3499                     (match_operand:VCVTI 3 "register_operand")))]
3500   "TARGET_DOTPROD"
3502   emit_insn (
3503     gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3504                                  operands[2]));
3505   emit_insn (gen_rtx_SET (operands[0], operands[3]));
3506   DONE;
3509 (define_expand "neon_copysignf<mode>"
3510   [(match_operand:VCVTF 0 "register_operand")
3511    (match_operand:VCVTF 1 "register_operand")
3512    (match_operand:VCVTF 2 "register_operand")]
3513   "TARGET_NEON"
3514   "{
3515      rtx v_bitmask_cast;
3516      rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3517      rtx c = GEN_INT (0x80000000);
3519      emit_move_insn (v_bitmask,
3520                      gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3521      emit_move_insn (operands[0], operands[2]);
3522      v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3523                                            <VCVTF:V_cmp_result>mode, 0);
3524      emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3525                                      operands[1]));
3527      DONE;
3528   }"
3531 (define_insn "neon_vqneg<mode>"
3532   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3533         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3534                       UNSPEC_VQNEG))]
3535   "TARGET_NEON"
3536   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3537   [(set_attr "type" "neon_qneg<q>")]
3540 (define_insn "neon_vcls<mode>"
3541   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3542         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3543                       UNSPEC_VCLS))]
3544   "TARGET_NEON"
3545   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3546   [(set_attr "type" "neon_cls<q>")]
3549 (define_insn "clz<mode>2"
3550   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3551         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3552   "TARGET_NEON"
3553   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3554   [(set_attr "type" "neon_cnt<q>")]
3557 (define_expand "neon_vclz<mode>"
3558   [(match_operand:VDQIW 0 "s_register_operand" "")
3559    (match_operand:VDQIW 1 "s_register_operand" "")]
3560   "TARGET_NEON"
3562   emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3563   DONE;
3566 (define_insn "popcount<mode>2"
3567   [(set (match_operand:VE 0 "s_register_operand" "=w")
3568         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3569   "TARGET_NEON"
3570   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3571   [(set_attr "type" "neon_cnt<q>")]
3574 (define_expand "neon_vcnt<mode>"
3575   [(match_operand:VE 0 "s_register_operand" "=w")
3576    (match_operand:VE 1 "s_register_operand" "w")]
3577   "TARGET_NEON"
3579   emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3580   DONE;
3583 (define_insn "neon_vrecpe<mode>"
3584   [(set (match_operand:VH 0 "s_register_operand" "=w")
3585         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3586                    UNSPEC_VRECPE))]
3587   "TARGET_NEON_FP16INST"
3588   "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3589   [(set_attr "type" "neon_fp_recpe_s<q>")]
3592 (define_insn "neon_vrecpe<mode>"
3593   [(set (match_operand:V32 0 "s_register_operand" "=w")
3594         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3595                     UNSPEC_VRECPE))]
3596   "TARGET_NEON"
3597   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3598   [(set_attr "type" "neon_fp_recpe_s<q>")]
3601 (define_insn "neon_vrsqrte<mode>"
3602   [(set (match_operand:V32 0 "s_register_operand" "=w")
3603         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3604                     UNSPEC_VRSQRTE))]
3605   "TARGET_NEON"
3606   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3607   [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3610 (define_expand "neon_vmvn<mode>"
3611   [(match_operand:VDQIW 0 "s_register_operand" "")
3612    (match_operand:VDQIW 1 "s_register_operand" "")]
3613   "TARGET_NEON"
3615   emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3616   DONE;
3619 (define_insn "neon_vget_lane<mode>_sext_internal"
3620   [(set (match_operand:SI 0 "s_register_operand" "=r")
3621         (sign_extend:SI
3622           (vec_select:<V_elem>
3623             (match_operand:VD 1 "s_register_operand" "w")
3624             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3625   "TARGET_NEON"
3627   if (BYTES_BIG_ENDIAN)
3628     {
3629       int elt = INTVAL (operands[2]);
3630       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3631       operands[2] = GEN_INT (elt);
3632     }
3633   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3635   [(set_attr "type" "neon_to_gp")]
3638 (define_insn "neon_vget_lane<mode>_zext_internal"
3639   [(set (match_operand:SI 0 "s_register_operand" "=r")
3640         (zero_extend:SI
3641           (vec_select:<V_elem>
3642             (match_operand:VD 1 "s_register_operand" "w")
3643             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3644   "TARGET_NEON"
3646   if (BYTES_BIG_ENDIAN)
3647     {
3648       int elt = INTVAL (operands[2]);
3649       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3650       operands[2] = GEN_INT (elt);
3651     }
3652   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3654   [(set_attr "type" "neon_to_gp")]
3657 (define_insn "neon_vget_lane<mode>_sext_internal"
3658   [(set (match_operand:SI 0 "s_register_operand" "=r")
3659         (sign_extend:SI
3660           (vec_select:<V_elem>
3661             (match_operand:VQ2 1 "s_register_operand" "w")
3662             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3663   "TARGET_NEON"
3665   rtx ops[3];
3666   int regno = REGNO (operands[1]);
3667   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3668   unsigned int elt = INTVAL (operands[2]);
3669   unsigned int elt_adj = elt % halfelts;
3671   if (BYTES_BIG_ENDIAN)
3672     elt_adj = halfelts - 1 - elt_adj;
3674   ops[0] = operands[0];
3675   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3676   ops[2] = GEN_INT (elt_adj);
3677   output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3679   return "";
3681   [(set_attr "type" "neon_to_gp_q")]
3684 (define_insn "neon_vget_lane<mode>_zext_internal"
3685   [(set (match_operand:SI 0 "s_register_operand" "=r")
3686         (zero_extend:SI
3687           (vec_select:<V_elem>
3688             (match_operand:VQ2 1 "s_register_operand" "w")
3689             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3690   "TARGET_NEON"
3692   rtx ops[3];
3693   int regno = REGNO (operands[1]);
3694   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3695   unsigned int elt = INTVAL (operands[2]);
3696   unsigned int elt_adj = elt % halfelts;
3698   if (BYTES_BIG_ENDIAN)
3699     elt_adj = halfelts - 1 - elt_adj;
3701   ops[0] = operands[0];
3702   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3703   ops[2] = GEN_INT (elt_adj);
3704   output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3706   return "";
3708   [(set_attr "type" "neon_to_gp_q")]
3711 (define_expand "neon_vget_lane<mode>"
3712   [(match_operand:<V_ext> 0 "s_register_operand" "")
3713    (match_operand:VDQW 1 "s_register_operand" "")
3714    (match_operand:SI 2 "immediate_operand" "")]
3715   "TARGET_NEON"
3717   if (BYTES_BIG_ENDIAN)
3718     {
3719       /* The intrinsics are defined in terms of a model where the
3720          element ordering in memory is vldm order, whereas the generic
3721          RTL is defined in terms of a model where the element ordering
3722          in memory is array order.  Convert the lane number to conform
3723          to this model.  */
3724       unsigned int elt = INTVAL (operands[2]);
3725       unsigned int reg_nelts
3726         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3727       elt ^= reg_nelts - 1;
3728       operands[2] = GEN_INT (elt);
3729     }
3731   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3732     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3733                                                 operands[2]));
3734   else
3735     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3736                                                        operands[1],
3737                                                        operands[2]));
3738   DONE;
3741 (define_expand "neon_vget_laneu<mode>"
3742   [(match_operand:<V_ext> 0 "s_register_operand" "")
3743    (match_operand:VDQIW 1 "s_register_operand" "")
3744    (match_operand:SI 2 "immediate_operand" "")]
3745   "TARGET_NEON"
3747   if (BYTES_BIG_ENDIAN)
3748     {
3749       /* The intrinsics are defined in terms of a model where the
3750          element ordering in memory is vldm order, whereas the generic
3751          RTL is defined in terms of a model where the element ordering
3752          in memory is array order.  Convert the lane number to conform
3753          to this model.  */
3754       unsigned int elt = INTVAL (operands[2]);
3755       unsigned int reg_nelts
3756         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3757       elt ^= reg_nelts - 1;
3758       operands[2] = GEN_INT (elt);
3759     }
3761   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3762     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3763                                                 operands[2]));
3764   else
3765     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3766                                                        operands[1],
3767                                                        operands[2]));
3768   DONE;
3771 (define_expand "neon_vget_lanedi"
3772   [(match_operand:DI 0 "s_register_operand" "=r")
3773    (match_operand:DI 1 "s_register_operand" "w")
3774    (match_operand:SI 2 "immediate_operand" "")]
3775   "TARGET_NEON"
3777   emit_move_insn (operands[0], operands[1]);
3778   DONE;
3781 (define_expand "neon_vget_lanev2di"
3782   [(match_operand:DI 0 "s_register_operand" "")
3783    (match_operand:V2DI 1 "s_register_operand" "")
3784    (match_operand:SI 2 "immediate_operand" "")]
3785   "TARGET_NEON"
3787   int lane;
3789 if (BYTES_BIG_ENDIAN)
3790     {
3791       /* The intrinsics are defined in terms of a model where the
3792          element ordering in memory is vldm order, whereas the generic
3793          RTL is defined in terms of a model where the element ordering
3794          in memory is array order.  Convert the lane number to conform
3795          to this model.  */
3796       unsigned int elt = INTVAL (operands[2]);
3797       unsigned int reg_nelts = 2;
3798       elt ^= reg_nelts - 1;
3799       operands[2] = GEN_INT (elt);
3800     }
3802   lane = INTVAL (operands[2]);
3803   gcc_assert ((lane ==0) || (lane == 1));
3804   emit_move_insn (operands[0], lane == 0
3805                                 ? gen_lowpart (DImode, operands[1])
3806                                 : gen_highpart (DImode, operands[1]));
3807   DONE;
3810 (define_expand "neon_vset_lane<mode>"
3811   [(match_operand:VDQ 0 "s_register_operand" "=w")
3812    (match_operand:<V_elem> 1 "s_register_operand" "r")
3813    (match_operand:VDQ 2 "s_register_operand" "0")
3814    (match_operand:SI 3 "immediate_operand" "i")]
3815   "TARGET_NEON"
3817   unsigned int elt = INTVAL (operands[3]);
3819   if (BYTES_BIG_ENDIAN)
3820     {
3821       unsigned int reg_nelts
3822         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3823       elt ^= reg_nelts - 1;
3824     }
3826   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3827                                          GEN_INT (1 << elt), operands[2]));
3828   DONE;
3831 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3833 (define_expand "neon_vset_lanedi"
3834   [(match_operand:DI 0 "s_register_operand" "=w")
3835    (match_operand:DI 1 "s_register_operand" "r")
3836    (match_operand:DI 2 "s_register_operand" "0")
3837    (match_operand:SI 3 "immediate_operand" "i")]
3838   "TARGET_NEON"
3840   emit_move_insn (operands[0], operands[1]);
3841   DONE;
3844 (define_expand "neon_vcreate<mode>"
3845   [(match_operand:VD_RE 0 "s_register_operand" "")
3846    (match_operand:DI 1 "general_operand" "")]
3847   "TARGET_NEON"
3849   rtx src = gen_lowpart (<MODE>mode, operands[1]);
3850   emit_move_insn (operands[0], src);
3851   DONE;
3854 (define_insn "neon_vdup_n<mode>"
3855   [(set (match_operand:VX 0 "s_register_operand" "=w")
3856         (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3857   "TARGET_NEON"
3858   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3859   [(set_attr "type" "neon_from_gp<q>")]
3862 (define_insn "neon_vdup_nv4hf"
3863   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3864         (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3865   "TARGET_NEON"
3866   "vdup.16\t%P0, %1"
3867   [(set_attr "type" "neon_from_gp")]
3870 (define_insn "neon_vdup_nv8hf"
3871   [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3872         (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3873   "TARGET_NEON"
3874   "vdup.16\t%q0, %1"
3875   [(set_attr "type" "neon_from_gp_q")]
3878 (define_insn "neon_vdup_n<mode>"
3879   [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3880         (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3881   "TARGET_NEON"
3882   "@
3883   vdup.<V_sz_elem>\t%<V_reg>0, %1
3884   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3885   [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3888 (define_expand "neon_vdup_ndi"
3889   [(match_operand:DI 0 "s_register_operand" "=w")
3890    (match_operand:DI 1 "s_register_operand" "r")]
3891   "TARGET_NEON"
3893   emit_move_insn (operands[0], operands[1]);
3894   DONE;
3898 (define_insn "neon_vdup_nv2di"
3899   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3900         (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3901   "TARGET_NEON"
3902   "@
3903   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3904   vmov\t%e0, %P1\;vmov\t%f0, %P1"
3905   [(set_attr "length" "8")
3906    (set_attr "type" "multiple")]
3909 (define_insn "neon_vdup_lane<mode>_internal"
3910   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3911         (vec_duplicate:VDQW 
3912           (vec_select:<V_elem>
3913             (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3914             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3915   "TARGET_NEON"
3917   if (BYTES_BIG_ENDIAN)
3918     {
3919       int elt = INTVAL (operands[2]);
3920       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3921       operands[2] = GEN_INT (elt);
3922     }
3923   if (<Is_d_reg>)
3924     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3925   else
3926     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3928   [(set_attr "type" "neon_dup<q>")]
3931 (define_insn "neon_vdup_lane<mode>_internal"
3932  [(set (match_operand:VH 0 "s_register_operand" "=w")
3933    (vec_duplicate:VH
3934     (vec_select:<V_elem>
3935      (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3936      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3937  "TARGET_NEON && TARGET_FP16"
3939   if (BYTES_BIG_ENDIAN)
3940     {
3941       int elt = INTVAL (operands[2]);
3942       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3943       operands[2] = GEN_INT (elt);
3944     }
3945   if (<Is_d_reg>)
3946     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3947   else
3948     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3950   [(set_attr "type" "neon_dup<q>")]
3953 (define_expand "neon_vdup_lane<mode>"
3954   [(match_operand:VDQW 0 "s_register_operand" "=w")
3955    (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3956    (match_operand:SI 2 "immediate_operand" "i")]
3957   "TARGET_NEON"
3959   if (BYTES_BIG_ENDIAN)
3960     {
3961       unsigned int elt = INTVAL (operands[2]);
3962       unsigned int reg_nelts
3963         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3964       elt ^= reg_nelts - 1;
3965       operands[2] = GEN_INT (elt);
3966     }
3967     emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3968                                                   operands[2]));
3969     DONE;
3972 (define_expand "neon_vdup_lane<mode>"
3973   [(match_operand:VH 0 "s_register_operand")
3974    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3975    (match_operand:SI 2 "immediate_operand")]
3976   "TARGET_NEON && TARGET_FP16"
3978   if (BYTES_BIG_ENDIAN)
3979     {
3980       unsigned int elt = INTVAL (operands[2]);
3981       unsigned int reg_nelts
3982         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3983       elt ^= reg_nelts - 1;
3984       operands[2] = GEN_INT (elt);
3985     }
3986   emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3987                                                 operands[2]));
3988   DONE;
3991 ; Scalar index is ignored, since only zero is valid here.
3992 (define_expand "neon_vdup_lanedi"
3993   [(match_operand:DI 0 "s_register_operand" "=w")
3994    (match_operand:DI 1 "s_register_operand" "w")
3995    (match_operand:SI 2 "immediate_operand" "i")]
3996   "TARGET_NEON"
3998   emit_move_insn (operands[0], operands[1]);
3999   DONE;
4002 ; Likewise for v2di, as the DImode second operand has only a single element.
4003 (define_expand "neon_vdup_lanev2di"
4004   [(match_operand:V2DI 0 "s_register_operand" "=w")
4005    (match_operand:DI 1 "s_register_operand" "w")
4006    (match_operand:SI 2 "immediate_operand" "i")]
4007   "TARGET_NEON"
4009   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4010   DONE;
4013 ; Disabled before reload because we don't want combine doing something silly,
4014 ; but used by the post-reload expansion of neon_vcombine.
4015 (define_insn "*neon_vswp<mode>"
4016   [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4017         (match_operand:VDQX 1 "s_register_operand" "+w"))
4018    (set (match_dup 1) (match_dup 0))]
4019   "TARGET_NEON && reload_completed"
4020   "vswp\t%<V_reg>0, %<V_reg>1"
4021   [(set_attr "type" "neon_permute<q>")]
4024 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4025 ;; dest vector.
4026 ;; FIXME: A different implementation of this builtin could make it much
4027 ;; more likely that we wouldn't actually need to output anything (we could make
4028 ;; it so that the reg allocator puts things in the right places magically
4029 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4031 (define_insn_and_split "neon_vcombine<mode>"
4032   [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4033         (vec_concat:<V_DOUBLE>
4034           (match_operand:VDX 1 "s_register_operand" "w")
4035           (match_operand:VDX 2 "s_register_operand" "w")))]
4036   "TARGET_NEON"
4037   "#"
4038   "&& reload_completed"
4039   [(const_int 0)]
4041   neon_split_vcombine (operands);
4042   DONE;
4044 [(set_attr "type" "multiple")]
4047 (define_expand "neon_vget_high<mode>"
4048   [(match_operand:<V_HALF> 0 "s_register_operand")
4049    (match_operand:VQX 1 "s_register_operand")]
4050   "TARGET_NEON"
4052   emit_move_insn (operands[0],
4053                   simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4054                                        GET_MODE_SIZE (<V_HALF>mode)));
4055   DONE;
4058 (define_expand "neon_vget_low<mode>"
4059   [(match_operand:<V_HALF> 0 "s_register_operand")
4060    (match_operand:VQX 1 "s_register_operand")]
4061   "TARGET_NEON"
4063   emit_move_insn (operands[0],
4064                   simplify_gen_subreg (<V_HALF>mode, operands[1],
4065                                        <MODE>mode, 0));
4066   DONE;
4069 (define_insn "float<mode><V_cvtto>2"
4070   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4071         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4072   "TARGET_NEON && !flag_rounding_math"
4073   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4074   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4077 (define_insn "floatuns<mode><V_cvtto>2"
4078   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4079         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 
4080   "TARGET_NEON && !flag_rounding_math"
4081   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4082   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4085 (define_insn "fix_trunc<mode><V_cvtto>2"
4086   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4087         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4088   "TARGET_NEON"
4089   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4090   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4093 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4094   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4095         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4096   "TARGET_NEON"
4097   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4098   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4101 (define_insn "neon_vcvt<sup><mode>"
4102   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4103         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4104                           VCVT_US))]
4105   "TARGET_NEON"
4106   "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4107   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4110 (define_insn "neon_vcvt<sup><mode>"
4111   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4112         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4113                           VCVT_US))]
4114   "TARGET_NEON"
4115   "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4116   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4119 (define_insn "neon_vcvtv4sfv4hf"
4120   [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4121         (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4122                           UNSPEC_VCVT))]
4123   "TARGET_NEON && TARGET_FP16"
4124   "vcvt.f32.f16\t%q0, %P1"
4125   [(set_attr "type" "neon_fp_cvt_widen_h")]
4128 (define_insn "neon_vcvtv4hfv4sf"
4129   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4130         (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4131                           UNSPEC_VCVT))]
4132   "TARGET_NEON && TARGET_FP16"
4133   "vcvt.f16.f32\t%P0, %q1"
4134   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4137 (define_insn "neon_vcvt<sup><mode>"
4138  [(set
4139    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4140    (unspec:<VH_CVTTO>
4141     [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4142     VCVT_US))]
4143  "TARGET_NEON_FP16INST"
4144  "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4145   [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4148 (define_insn "neon_vcvt<sup><mode>"
4149  [(set
4150    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4151    (unspec:<VH_CVTTO>
4152     [(match_operand:VH 1 "s_register_operand" "w")]
4153     VCVT_US))]
4154  "TARGET_NEON_FP16INST"
4155  "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4156   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4159 (define_insn "neon_vcvt<sup>_n<mode>"
4160   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4161         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4162                            (match_operand:SI 2 "immediate_operand" "i")]
4163                           VCVT_US_N))]
4164   "TARGET_NEON"
4166   arm_const_bounds (operands[2], 1, 33);
4167   return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4169   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4172 (define_insn "neon_vcvt<sup>_n<mode>"
4173  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4174    (unspec:<VH_CVTTO>
4175     [(match_operand:VH 1 "s_register_operand" "w")
4176      (match_operand:SI 2 "immediate_operand" "i")]
4177     VCVT_US_N))]
4178   "TARGET_NEON_FP16INST"
4180   arm_const_bounds (operands[2], 0, 17);
4181   return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4183  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4186 (define_insn "neon_vcvt<sup>_n<mode>"
4187   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4188         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4189                            (match_operand:SI 2 "immediate_operand" "i")]
4190                           VCVT_US_N))]
4191   "TARGET_NEON"
4193   arm_const_bounds (operands[2], 1, 33);
4194   return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4196   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4199 (define_insn "neon_vcvt<sup>_n<mode>"
4200  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4201    (unspec:<VH_CVTTO>
4202     [(match_operand:VCVTHI 1 "s_register_operand" "w")
4203      (match_operand:SI 2 "immediate_operand" "i")]
4204     VCVT_US_N))]
4205  "TARGET_NEON_FP16INST"
4207   arm_const_bounds (operands[2], 0, 17);
4208   return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4210  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4213 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4214  [(set
4215    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4216    (unspec:<VH_CVTTO>
4217     [(match_operand:VH 1 "s_register_operand" "w")]
4218     VCVT_HF_US))]
4219  "TARGET_NEON_FP16INST"
4220  "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4221   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4224 (define_insn "neon_vmovn<mode>"
4225   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4226         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4227                            UNSPEC_VMOVN))]
4228   "TARGET_NEON"
4229   "vmovn.<V_if_elem>\t%P0, %q1"
4230   [(set_attr "type" "neon_shift_imm_narrow_q")]
4233 (define_insn "neon_vqmovn<sup><mode>"
4234   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4235         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4236                            VQMOVN))]
4237   "TARGET_NEON"
4238   "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4239   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4242 (define_insn "neon_vqmovun<mode>"
4243   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4244         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4245                            UNSPEC_VQMOVUN))]
4246   "TARGET_NEON"
4247   "vqmovun.<V_s_elem>\t%P0, %q1"
4248   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4251 (define_insn "neon_vmovl<sup><mode>"
4252   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4253         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4254                           VMOVL))]
4255   "TARGET_NEON"
4256   "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4257   [(set_attr "type" "neon_shift_imm_long")]
4260 (define_insn "neon_vmul_lane<mode>"
4261   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4262         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4263                      (match_operand:VMD 2 "s_register_operand"
4264                                         "<scalar_mul_constraint>")
4265                      (match_operand:SI 3 "immediate_operand" "i")]
4266                     UNSPEC_VMUL_LANE))]
4267   "TARGET_NEON"
4269   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4271   [(set (attr "type")
4272      (if_then_else (match_test "<Is_float_mode>")
4273                    (const_string "neon_fp_mul_s_scalar<q>")
4274                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4277 (define_insn "neon_vmul_lane<mode>"
4278   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4279         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4280                      (match_operand:<V_HALF> 2 "s_register_operand"
4281                                              "<scalar_mul_constraint>")
4282                      (match_operand:SI 3 "immediate_operand" "i")]
4283                     UNSPEC_VMUL_LANE))]
4284   "TARGET_NEON"
4286   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4288   [(set (attr "type")
4289      (if_then_else (match_test "<Is_float_mode>")
4290                    (const_string "neon_fp_mul_s_scalar<q>")
4291                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4294 (define_insn "neon_vmul_lane<mode>"
4295   [(set (match_operand:VH 0 "s_register_operand" "=w")
4296         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4297                     (match_operand:V4HF 2 "s_register_operand"
4298                      "<scalar_mul_constraint>")
4299                      (match_operand:SI 3 "immediate_operand" "i")]
4300                      UNSPEC_VMUL_LANE))]
4301   "TARGET_NEON_FP16INST"
4302   "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4303   [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4306 (define_insn "neon_vmull<sup>_lane<mode>"
4307   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4308         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4309                            (match_operand:VMDI 2 "s_register_operand"
4310                                                "<scalar_mul_constraint>")
4311                            (match_operand:SI 3 "immediate_operand" "i")]
4312                           VMULL_LANE))]
4313   "TARGET_NEON"
4315   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4317   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4320 (define_insn "neon_vqdmull_lane<mode>"
4321   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4322         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4323                            (match_operand:VMDI 2 "s_register_operand"
4324                                                "<scalar_mul_constraint>")
4325                            (match_operand:SI 3 "immediate_operand" "i")]
4326                           UNSPEC_VQDMULL_LANE))]
4327   "TARGET_NEON"
4329   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4331   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4334 (define_insn "neon_vq<r>dmulh_lane<mode>"
4335   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4336         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4337                       (match_operand:<V_HALF> 2 "s_register_operand"
4338                                               "<scalar_mul_constraint>")
4339                       (match_operand:SI 3 "immediate_operand" "i")]
4340                       VQDMULH_LANE))]
4341   "TARGET_NEON"
4343   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4345   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4348 (define_insn "neon_vq<r>dmulh_lane<mode>"
4349   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4350         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4351                       (match_operand:VMDI 2 "s_register_operand"
4352                                           "<scalar_mul_constraint>")
4353                       (match_operand:SI 3 "immediate_operand" "i")]
4354                       VQDMULH_LANE))]
4355   "TARGET_NEON"
4357   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4359   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4362 ;; vqrdmlah_lane, vqrdmlsh_lane
4363 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4364   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4365         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4366                       (match_operand:VMQI 2 "s_register_operand" "w")
4367                       (match_operand:<V_HALF> 3 "s_register_operand"
4368                                           "<scalar_mul_constraint>")
4369                       (match_operand:SI 4 "immediate_operand" "i")]
4370                      VQRDMLH_AS))]
4371   "TARGET_NEON_RDMA"
4373   return
4374    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4376   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4379 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4380   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4381         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4382                       (match_operand:VMDI 2 "s_register_operand" "w")
4383                       (match_operand:VMDI 3 "s_register_operand"
4384                                           "<scalar_mul_constraint>")
4385                       (match_operand:SI 4 "immediate_operand" "i")]
4386                      VQRDMLH_AS))]
4387   "TARGET_NEON_RDMA"
4389   return
4390    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4392   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4395 (define_insn "neon_vmla_lane<mode>"
4396   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4397         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4398                      (match_operand:VMD 2 "s_register_operand" "w")
4399                      (match_operand:VMD 3 "s_register_operand"
4400                                         "<scalar_mul_constraint>")
4401                      (match_operand:SI 4 "immediate_operand" "i")]
4402                      UNSPEC_VMLA_LANE))]
4403   "TARGET_NEON"
4405   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4407   [(set (attr "type")
4408      (if_then_else (match_test "<Is_float_mode>")
4409                    (const_string "neon_fp_mla_s_scalar<q>")
4410                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4413 (define_insn "neon_vmla_lane<mode>"
4414   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4415         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4416                      (match_operand:VMQ 2 "s_register_operand" "w")
4417                      (match_operand:<V_HALF> 3 "s_register_operand"
4418                                              "<scalar_mul_constraint>")
4419                      (match_operand:SI 4 "immediate_operand" "i")]
4420                      UNSPEC_VMLA_LANE))]
4421   "TARGET_NEON"
4423   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4425   [(set (attr "type")
4426      (if_then_else (match_test "<Is_float_mode>")
4427                    (const_string "neon_fp_mla_s_scalar<q>")
4428                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4431 (define_insn "neon_vmlal<sup>_lane<mode>"
4432   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4433         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4434                            (match_operand:VMDI 2 "s_register_operand" "w")
4435                            (match_operand:VMDI 3 "s_register_operand"
4436                                                "<scalar_mul_constraint>")
4437                            (match_operand:SI 4 "immediate_operand" "i")]
4438                           VMLAL_LANE))]
4439   "TARGET_NEON"
4441   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4443   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4446 (define_insn "neon_vqdmlal_lane<mode>"
4447   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4448         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4449                            (match_operand:VMDI 2 "s_register_operand" "w")
4450                            (match_operand:VMDI 3 "s_register_operand"
4451                                                "<scalar_mul_constraint>")
4452                            (match_operand:SI 4 "immediate_operand" "i")]
4453                           UNSPEC_VQDMLAL_LANE))]
4454   "TARGET_NEON"
4456   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4458   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4461 (define_insn "neon_vmls_lane<mode>"
4462   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4463         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4464                      (match_operand:VMD 2 "s_register_operand" "w")
4465                      (match_operand:VMD 3 "s_register_operand"
4466                                         "<scalar_mul_constraint>")
4467                      (match_operand:SI 4 "immediate_operand" "i")]
4468                     UNSPEC_VMLS_LANE))]
4469   "TARGET_NEON"
4471   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4473   [(set (attr "type")
4474      (if_then_else (match_test "<Is_float_mode>")
4475                    (const_string "neon_fp_mla_s_scalar<q>")
4476                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4479 (define_insn "neon_vmls_lane<mode>"
4480   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4481         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4482                      (match_operand:VMQ 2 "s_register_operand" "w")
4483                      (match_operand:<V_HALF> 3 "s_register_operand"
4484                                              "<scalar_mul_constraint>")
4485                      (match_operand:SI 4 "immediate_operand" "i")]
4486                     UNSPEC_VMLS_LANE))]
4487   "TARGET_NEON"
4489   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4491   [(set (attr "type")
4492      (if_then_else (match_test "<Is_float_mode>")
4493                    (const_string "neon_fp_mla_s_scalar<q>")
4494                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4497 (define_insn "neon_vmlsl<sup>_lane<mode>"
4498   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4499         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4500                            (match_operand:VMDI 2 "s_register_operand" "w")
4501                            (match_operand:VMDI 3 "s_register_operand"
4502                                                "<scalar_mul_constraint>")
4503                            (match_operand:SI 4 "immediate_operand" "i")]
4504                           VMLSL_LANE))]
4505   "TARGET_NEON"
4507   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4509   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4512 (define_insn "neon_vqdmlsl_lane<mode>"
4513   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4514         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4515                            (match_operand:VMDI 2 "s_register_operand" "w")
4516                            (match_operand:VMDI 3 "s_register_operand"
4517                                                "<scalar_mul_constraint>")
4518                            (match_operand:SI 4 "immediate_operand" "i")]
4519                           UNSPEC_VQDMLSL_LANE))]
4520   "TARGET_NEON"
4522   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4524   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4527 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4528 ; core register into a temp register, then use a scalar taken from that. This
4529 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4530 ; or extracted from another vector. The latter case it's currently better to
4531 ; use the "_lane" variant, and the former case can probably be implemented
4532 ; using vld1_lane, but that hasn't been done yet.
4534 (define_expand "neon_vmul_n<mode>"
4535   [(match_operand:VMD 0 "s_register_operand" "")
4536    (match_operand:VMD 1 "s_register_operand" "")
4537    (match_operand:<V_elem> 2 "s_register_operand" "")]
4538   "TARGET_NEON"
4540   rtx tmp = gen_reg_rtx (<MODE>mode);
4541   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4542   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4543                                        const0_rtx));
4544   DONE;
4547 (define_expand "neon_vmul_n<mode>"
4548   [(match_operand:VMQ 0 "s_register_operand" "")
4549    (match_operand:VMQ 1 "s_register_operand" "")
4550    (match_operand:<V_elem> 2 "s_register_operand" "")]
4551   "TARGET_NEON"
4553   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4554   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4555   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4556                                        const0_rtx));
4557   DONE;
4560 (define_expand "neon_vmul_n<mode>"
4561   [(match_operand:VH 0 "s_register_operand")
4562    (match_operand:VH 1 "s_register_operand")
4563    (match_operand:<V_elem> 2 "s_register_operand")]
4564   "TARGET_NEON_FP16INST"
4566   rtx tmp = gen_reg_rtx (V4HFmode);
4567   emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4568   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4569                                        const0_rtx));
4570   DONE;
4573 (define_expand "neon_vmulls_n<mode>"
4574   [(match_operand:<V_widen> 0 "s_register_operand" "")
4575    (match_operand:VMDI 1 "s_register_operand" "")
4576    (match_operand:<V_elem> 2 "s_register_operand" "")]
4577   "TARGET_NEON"
4579   rtx tmp = gen_reg_rtx (<MODE>mode);
4580   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4581   emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4582                                          const0_rtx));
4583   DONE;
4586 (define_expand "neon_vmullu_n<mode>"
4587   [(match_operand:<V_widen> 0 "s_register_operand" "")
4588    (match_operand:VMDI 1 "s_register_operand" "")
4589    (match_operand:<V_elem> 2 "s_register_operand" "")]
4590   "TARGET_NEON"
4592   rtx tmp = gen_reg_rtx (<MODE>mode);
4593   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4594   emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4595                                          const0_rtx));
4596   DONE;
4599 (define_expand "neon_vqdmull_n<mode>"
4600   [(match_operand:<V_widen> 0 "s_register_operand" "")
4601    (match_operand:VMDI 1 "s_register_operand" "")
4602    (match_operand:<V_elem> 2 "s_register_operand" "")]
4603   "TARGET_NEON"
4605   rtx tmp = gen_reg_rtx (<MODE>mode);
4606   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4607   emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4608                                           const0_rtx));
4609   DONE;
4612 (define_expand "neon_vqdmulh_n<mode>"
4613   [(match_operand:VMDI 0 "s_register_operand" "")
4614    (match_operand:VMDI 1 "s_register_operand" "")
4615    (match_operand:<V_elem> 2 "s_register_operand" "")]
4616   "TARGET_NEON"
4618   rtx tmp = gen_reg_rtx (<MODE>mode);
4619   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4620   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4621                                           const0_rtx));
4622   DONE;
4625 (define_expand "neon_vqrdmulh_n<mode>"
4626   [(match_operand:VMDI 0 "s_register_operand" "")
4627    (match_operand:VMDI 1 "s_register_operand" "")
4628    (match_operand:<V_elem> 2 "s_register_operand" "")]
4629   "TARGET_NEON"
4631   rtx tmp = gen_reg_rtx (<MODE>mode);
4632   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4633   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4634                                           const0_rtx));
4635   DONE;
4638 (define_expand "neon_vqdmulh_n<mode>"
4639   [(match_operand:VMQI 0 "s_register_operand" "")
4640    (match_operand:VMQI 1 "s_register_operand" "")
4641    (match_operand:<V_elem> 2 "s_register_operand" "")]
4642   "TARGET_NEON"
4644   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4645   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4646   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4647                                           const0_rtx));
4648   DONE;
4651 (define_expand "neon_vqrdmulh_n<mode>"
4652   [(match_operand:VMQI 0 "s_register_operand" "")
4653    (match_operand:VMQI 1 "s_register_operand" "")
4654    (match_operand:<V_elem> 2 "s_register_operand" "")]
4655   "TARGET_NEON"
4657   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4659   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4660                                            const0_rtx));
4661   DONE;
4664 (define_expand "neon_vmla_n<mode>"
4665   [(match_operand:VMD 0 "s_register_operand" "")
4666    (match_operand:VMD 1 "s_register_operand" "")
4667    (match_operand:VMD 2 "s_register_operand" "")
4668    (match_operand:<V_elem> 3 "s_register_operand" "")]
4669   "TARGET_NEON"
4671   rtx tmp = gen_reg_rtx (<MODE>mode);
4672   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4674                                        tmp, const0_rtx));
4675   DONE;
4678 (define_expand "neon_vmla_n<mode>"
4679   [(match_operand:VMQ 0 "s_register_operand" "")
4680    (match_operand:VMQ 1 "s_register_operand" "")
4681    (match_operand:VMQ 2 "s_register_operand" "")
4682    (match_operand:<V_elem> 3 "s_register_operand" "")]
4683   "TARGET_NEON"
4685   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4686   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4687   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4688                                        tmp, const0_rtx));
4689   DONE;
4692 (define_expand "neon_vmlals_n<mode>"
4693   [(match_operand:<V_widen> 0 "s_register_operand" "")
4694    (match_operand:<V_widen> 1 "s_register_operand" "")
4695    (match_operand:VMDI 2 "s_register_operand" "")
4696    (match_operand:<V_elem> 3 "s_register_operand" "")]
4697   "TARGET_NEON"
4699   rtx tmp = gen_reg_rtx (<MODE>mode);
4700   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701   emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4702                                          tmp, const0_rtx));
4703   DONE;
4706 (define_expand "neon_vmlalu_n<mode>"
4707   [(match_operand:<V_widen> 0 "s_register_operand" "")
4708    (match_operand:<V_widen> 1 "s_register_operand" "")
4709    (match_operand:VMDI 2 "s_register_operand" "")
4710    (match_operand:<V_elem> 3 "s_register_operand" "")]
4711   "TARGET_NEON"
4713   rtx tmp = gen_reg_rtx (<MODE>mode);
4714   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4715   emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4716                                          tmp, const0_rtx));
4717   DONE;
4720 (define_expand "neon_vqdmlal_n<mode>"
4721   [(match_operand:<V_widen> 0 "s_register_operand" "")
4722    (match_operand:<V_widen> 1 "s_register_operand" "")
4723    (match_operand:VMDI 2 "s_register_operand" "")
4724    (match_operand:<V_elem> 3 "s_register_operand" "")]
4725   "TARGET_NEON"
4727   rtx tmp = gen_reg_rtx (<MODE>mode);
4728   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4729   emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4730                                           tmp, const0_rtx));
4731   DONE;
4734 (define_expand "neon_vmls_n<mode>"
4735   [(match_operand:VMD 0 "s_register_operand" "")
4736    (match_operand:VMD 1 "s_register_operand" "")
4737    (match_operand:VMD 2 "s_register_operand" "")
4738    (match_operand:<V_elem> 3 "s_register_operand" "")]
4739   "TARGET_NEON"
4741   rtx tmp = gen_reg_rtx (<MODE>mode);
4742   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4743   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4744                                        tmp, const0_rtx));
4745   DONE;
4748 (define_expand "neon_vmls_n<mode>"
4749   [(match_operand:VMQ 0 "s_register_operand" "")
4750    (match_operand:VMQ 1 "s_register_operand" "")
4751    (match_operand:VMQ 2 "s_register_operand" "")
4752    (match_operand:<V_elem> 3 "s_register_operand" "")]
4753   "TARGET_NEON"
4755   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4756   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4757   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4758                                        tmp, const0_rtx));
4759   DONE;
4762 (define_expand "neon_vmlsls_n<mode>"
4763   [(match_operand:<V_widen> 0 "s_register_operand" "")
4764    (match_operand:<V_widen> 1 "s_register_operand" "")
4765    (match_operand:VMDI 2 "s_register_operand" "")
4766    (match_operand:<V_elem> 3 "s_register_operand" "")]
4767   "TARGET_NEON"
4769   rtx tmp = gen_reg_rtx (<MODE>mode);
4770   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4771   emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4772                                         tmp, const0_rtx));
4773   DONE;
4776 (define_expand "neon_vmlslu_n<mode>"
4777   [(match_operand:<V_widen> 0 "s_register_operand" "")
4778    (match_operand:<V_widen> 1 "s_register_operand" "")
4779    (match_operand:VMDI 2 "s_register_operand" "")
4780    (match_operand:<V_elem> 3 "s_register_operand" "")]
4781   "TARGET_NEON"
4783   rtx tmp = gen_reg_rtx (<MODE>mode);
4784   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4785   emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4786                                         tmp, const0_rtx));
4787   DONE;
4790 (define_expand "neon_vqdmlsl_n<mode>"
4791   [(match_operand:<V_widen> 0 "s_register_operand" "")
4792    (match_operand:<V_widen> 1 "s_register_operand" "")
4793    (match_operand:VMDI 2 "s_register_operand" "")
4794    (match_operand:<V_elem> 3 "s_register_operand" "")]
4795   "TARGET_NEON"
4797   rtx tmp = gen_reg_rtx (<MODE>mode);
4798   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4799   emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4800                                           tmp, const0_rtx));
4801   DONE;
4804 (define_insn "neon_vext<mode>"
4805   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4806         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4807                       (match_operand:VDQX 2 "s_register_operand" "w")
4808                       (match_operand:SI 3 "immediate_operand" "i")]
4809                      UNSPEC_VEXT))]
4810   "TARGET_NEON"
4812   arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4813   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4815   [(set_attr "type" "neon_ext<q>")]
4818 (define_insn "neon_vrev64<mode>"
4819   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4820         (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4821                     UNSPEC_VREV64))]
4822   "TARGET_NEON"
4823   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4824   [(set_attr "type" "neon_rev<q>")]
4827 (define_insn "neon_vrev32<mode>"
4828   [(set (match_operand:VX 0 "s_register_operand" "=w")
4829         (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4830                    UNSPEC_VREV32))]
4831   "TARGET_NEON"
4832   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4833   [(set_attr "type" "neon_rev<q>")]
4836 (define_insn "neon_vrev16<mode>"
4837   [(set (match_operand:VE 0 "s_register_operand" "=w")
4838         (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4839                    UNSPEC_VREV16))]
4840   "TARGET_NEON"
4841   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4842   [(set_attr "type" "neon_rev<q>")]
4845 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4846 ; allocation. For an intrinsic of form:
4847 ;   rD = vbsl_* (rS, rN, rM)
4848 ; We can use any of:
4849 ;   vbsl rS, rN, rM  (if D = S)
4850 ;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4851 ;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4853 (define_insn "neon_vbsl<mode>_internal"
4854   [(set (match_operand:VDQX 0 "s_register_operand"               "=w,w,w")
4855         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4856                       (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4857                       (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4858                      UNSPEC_VBSL))]
4859   "TARGET_NEON"
4860   "@
4861   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4862   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4863   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4864   [(set_attr "type" "neon_bsl<q>")]
4867 (define_expand "neon_vbsl<mode>"
4868   [(set (match_operand:VDQX 0 "s_register_operand" "")
4869         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4870                       (match_operand:VDQX 2 "s_register_operand" "")
4871                       (match_operand:VDQX 3 "s_register_operand" "")]
4872                      UNSPEC_VBSL))]
4873   "TARGET_NEON"
4875   /* We can't alias operands together if they have different modes.  */
4876   operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4879 ;; vshl, vrshl
4880 (define_insn "neon_v<shift_op><sup><mode>"
4881   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4882         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4883                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4884                       VSHL))]
4885   "TARGET_NEON"
4886   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4887   [(set_attr "type" "neon_shift_imm<q>")]
4890 ;; vqshl, vqrshl
4891 (define_insn "neon_v<shift_op><sup><mode>"
4892   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4893         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4894                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4895                       VQSHL))]
4896   "TARGET_NEON"
4897   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4898   [(set_attr "type" "neon_sat_shift_imm<q>")]
4901 ;; vshr_n, vrshr_n
4902 (define_insn "neon_v<shift_op><sup>_n<mode>"
4903   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4904         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4905                        (match_operand:SI 2 "immediate_operand" "i")]
4906                       VSHR_N))]
4907   "TARGET_NEON"
4909   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4910   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4912   [(set_attr "type" "neon_shift_imm<q>")]
4915 ;; vshrn_n, vrshrn_n
4916 (define_insn "neon_v<shift_op>_n<mode>"
4917   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4918         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4919                             (match_operand:SI 2 "immediate_operand" "i")]
4920                            VSHRN_N))]
4921   "TARGET_NEON"
4923   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4924   return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4926   [(set_attr "type" "neon_shift_imm_narrow_q")]
4929 ;; vqshrn_n, vqrshrn_n
4930 (define_insn "neon_v<shift_op><sup>_n<mode>"
4931   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4932         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4933                             (match_operand:SI 2 "immediate_operand" "i")]
4934                            VQSHRN_N))]
4935   "TARGET_NEON"
4937   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4938   return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4940   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4943 ;; vqshrun_n, vqrshrun_n
4944 (define_insn "neon_v<shift_op>_n<mode>"
4945   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4946         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4947                             (match_operand:SI 2 "immediate_operand" "i")]
4948                            VQSHRUN_N))]
4949   "TARGET_NEON"
4951   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4952   return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4954   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4957 (define_insn "neon_vshl_n<mode>"
4958   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4959         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4960                        (match_operand:SI 2 "immediate_operand" "i")]
4961                       UNSPEC_VSHL_N))]
4962   "TARGET_NEON"
4964   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4965   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4967   [(set_attr "type" "neon_shift_imm<q>")]
4970 (define_insn "neon_vqshl_<sup>_n<mode>"
4971   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4972         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4973                        (match_operand:SI 2 "immediate_operand" "i")]
4974                       VQSHL_N))]
4975   "TARGET_NEON"
4977   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4978   return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4980   [(set_attr "type" "neon_sat_shift_imm<q>")]
4983 (define_insn "neon_vqshlu_n<mode>"
4984   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4985         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4986                        (match_operand:SI 2 "immediate_operand" "i")]
4987                       UNSPEC_VQSHLU_N))]
4988   "TARGET_NEON"
4990   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4991   return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4993   [(set_attr "type" "neon_sat_shift_imm<q>")]
4996 (define_insn "neon_vshll<sup>_n<mode>"
4997   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4998         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4999                            (match_operand:SI 2 "immediate_operand" "i")]
5000                           VSHLL_N))]
5001   "TARGET_NEON"
5003   /* The boundaries are: 0 < imm <= size.  */
5004   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5005   return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5007   [(set_attr "type" "neon_shift_imm_long")]
5010 ;; vsra_n, vrsra_n
5011 (define_insn "neon_v<shift_op><sup>_n<mode>"
5012   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5013         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5014                        (match_operand:VDQIX 2 "s_register_operand" "w")
5015                        (match_operand:SI 3 "immediate_operand" "i")]
5016                       VSRA_N))]
5017   "TARGET_NEON"
5019   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5020   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5022   [(set_attr "type" "neon_shift_acc<q>")]
5025 (define_insn "neon_vsri_n<mode>"
5026   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5027         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5028                        (match_operand:VDQIX 2 "s_register_operand" "w")
5029                        (match_operand:SI 3 "immediate_operand" "i")]
5030                       UNSPEC_VSRI))]
5031   "TARGET_NEON"
5033   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5034   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5036   [(set_attr "type" "neon_shift_reg<q>")]
5039 (define_insn "neon_vsli_n<mode>"
5040   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5041         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5042                        (match_operand:VDQIX 2 "s_register_operand" "w")
5043                        (match_operand:SI 3 "immediate_operand" "i")]
5044                       UNSPEC_VSLI))]
5045   "TARGET_NEON"
5047   arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5048   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5050   [(set_attr "type" "neon_shift_reg<q>")]
5053 (define_insn "neon_vtbl1v8qi"
5054   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5055         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5056                       (match_operand:V8QI 2 "s_register_operand" "w")]
5057                      UNSPEC_VTBL))]
5058   "TARGET_NEON"
5059   "vtbl.8\t%P0, {%P1}, %P2"
5060   [(set_attr "type" "neon_tbl1")]
5063 (define_insn "neon_vtbl2v8qi"
5064   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5065         (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5066                       (match_operand:V8QI 2 "s_register_operand" "w")]
5067                      UNSPEC_VTBL))]
5068   "TARGET_NEON"
5070   rtx ops[4];
5071   int tabbase = REGNO (operands[1]);
5073   ops[0] = operands[0];
5074   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5075   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5076   ops[3] = operands[2];
5077   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5079   return "";
5081   [(set_attr "type" "neon_tbl2")]
5084 (define_insn "neon_vtbl3v8qi"
5085   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086         (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5087                       (match_operand:V8QI 2 "s_register_operand" "w")]
5088                      UNSPEC_VTBL))]
5089   "TARGET_NEON"
5091   rtx ops[5];
5092   int tabbase = REGNO (operands[1]);
5094   ops[0] = operands[0];
5095   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5098   ops[4] = operands[2];
5099   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5101   return "";
5103   [(set_attr "type" "neon_tbl3")]
5106 (define_insn "neon_vtbl4v8qi"
5107   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5108         (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5109                       (match_operand:V8QI 2 "s_register_operand" "w")]
5110                      UNSPEC_VTBL))]
5111   "TARGET_NEON"
5113   rtx ops[6];
5114   int tabbase = REGNO (operands[1]);
5116   ops[0] = operands[0];
5117   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5118   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5119   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5120   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5121   ops[5] = operands[2];
5122   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5124   return "";
5126   [(set_attr "type" "neon_tbl4")]
5129 ;; These three are used by the vec_perm infrastructure for V16QImode.
5130 (define_insn_and_split "neon_vtbl1v16qi"
5131   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5132         (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5133                        (match_operand:V16QI 2 "s_register_operand" "w")]
5134                       UNSPEC_VTBL))]
5135   "TARGET_NEON"
5136   "#"
5137   "&& reload_completed"
5138   [(const_int 0)]
5140   rtx op0, op1, op2, part0, part2;
5141   unsigned ofs;
5143   op0 = operands[0];
5144   op1 = gen_lowpart (TImode, operands[1]);
5145   op2 = operands[2];
5147   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5148   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5149   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5150   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5152   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5153   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5154   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5155   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5156   DONE;
5158   [(set_attr "type" "multiple")]
5161 (define_insn_and_split "neon_vtbl2v16qi"
5162   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5163         (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5164                        (match_operand:V16QI 2 "s_register_operand" "w")]
5165                       UNSPEC_VTBL))]
5166   "TARGET_NEON"
5167   "#"
5168   "&& reload_completed"
5169   [(const_int 0)]
5171   rtx op0, op1, op2, part0, part2;
5172   unsigned ofs;
5174   op0 = operands[0];
5175   op1 = operands[1];
5176   op2 = operands[2];
5178   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5179   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5180   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5181   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5183   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5184   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5185   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5186   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5187   DONE;
5189   [(set_attr "type" "multiple")]
5192 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5193 ;; handle quad-word input modes, producing octa-word output modes.  But
5194 ;; that requires us to add support for octa-word vector modes in moves.
5195 ;; That seems overkill for this one use in vec_perm.
5196 (define_insn_and_split "neon_vcombinev16qi"
5197   [(set (match_operand:OI 0 "s_register_operand" "=w")
5198         (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5199                     (match_operand:V16QI 2 "s_register_operand" "w")]
5200                    UNSPEC_VCONCAT))]
5201   "TARGET_NEON"
5202   "#"
5203   "&& reload_completed"
5204   [(const_int 0)]
5206   neon_split_vcombine (operands);
5207   DONE;
5209 [(set_attr "type" "multiple")]
5212 (define_insn "neon_vtbx1v8qi"
5213   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5214         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5215                       (match_operand:V8QI 2 "s_register_operand" "w")
5216                       (match_operand:V8QI 3 "s_register_operand" "w")]
5217                      UNSPEC_VTBX))]
5218   "TARGET_NEON"
5219   "vtbx.8\t%P0, {%P2}, %P3"
5220   [(set_attr "type" "neon_tbl1")]
5223 (define_insn "neon_vtbx2v8qi"
5224   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5225         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5226                       (match_operand:TI 2 "s_register_operand" "w")
5227                       (match_operand:V8QI 3 "s_register_operand" "w")]
5228                      UNSPEC_VTBX))]
5229   "TARGET_NEON"
5231   rtx ops[4];
5232   int tabbase = REGNO (operands[2]);
5234   ops[0] = operands[0];
5235   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5236   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5237   ops[3] = operands[3];
5238   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5240   return "";
5242   [(set_attr "type" "neon_tbl2")]
5245 (define_insn "neon_vtbx3v8qi"
5246   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5247         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5248                       (match_operand:EI 2 "s_register_operand" "w")
5249                       (match_operand:V8QI 3 "s_register_operand" "w")]
5250                      UNSPEC_VTBX))]
5251   "TARGET_NEON"
5253   rtx ops[5];
5254   int tabbase = REGNO (operands[2]);
5256   ops[0] = operands[0];
5257   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5258   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5259   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5260   ops[4] = operands[3];
5261   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5263   return "";
5265   [(set_attr "type" "neon_tbl3")]
5268 (define_insn "neon_vtbx4v8qi"
5269   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5270         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5271                       (match_operand:OI 2 "s_register_operand" "w")
5272                       (match_operand:V8QI 3 "s_register_operand" "w")]
5273                      UNSPEC_VTBX))]
5274   "TARGET_NEON"
5276   rtx ops[6];
5277   int tabbase = REGNO (operands[2]);
5279   ops[0] = operands[0];
5280   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5281   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5282   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5283   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5284   ops[5] = operands[3];
5285   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5287   return "";
5289   [(set_attr "type" "neon_tbl4")]
5292 (define_expand "neon_vtrn<mode>_internal"
5293   [(parallel
5294     [(set (match_operand:VDQWH 0 "s_register_operand")
5295           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5296                          (match_operand:VDQWH 2 "s_register_operand")]
5297            UNSPEC_VTRN1))
5298      (set (match_operand:VDQWH 3 "s_register_operand")
5299           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5300   "TARGET_NEON"
5301   ""
5304 ;; Note: Different operand numbering to handle tied registers correctly.
5305 (define_insn "*neon_vtrn<mode>_insn"
5306   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5307         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5308                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5309          UNSPEC_VTRN1))
5310    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5311         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5312          UNSPEC_VTRN2))]
5313   "TARGET_NEON"
5314   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5315   [(set_attr "type" "neon_permute<q>")]
5318 (define_expand "neon_vzip<mode>_internal"
5319   [(parallel
5320     [(set (match_operand:VDQWH 0 "s_register_operand")
5321           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5322                          (match_operand:VDQWH 2 "s_register_operand")]
5323            UNSPEC_VZIP1))
5324     (set (match_operand:VDQWH 3 "s_register_operand")
5325          (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5326   "TARGET_NEON"
5327   ""
5330 ;; Note: Different operand numbering to handle tied registers correctly.
5331 (define_insn "*neon_vzip<mode>_insn"
5332   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5333         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5334                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5335          UNSPEC_VZIP1))
5336    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5337         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5338          UNSPEC_VZIP2))]
5339   "TARGET_NEON"
5340   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5341   [(set_attr "type" "neon_zip<q>")]
5344 (define_expand "neon_vuzp<mode>_internal"
5345   [(parallel
5346     [(set (match_operand:VDQWH 0 "s_register_operand")
5347           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5348                         (match_operand:VDQWH 2 "s_register_operand")]
5349            UNSPEC_VUZP1))
5350      (set (match_operand:VDQWH 3 "s_register_operand" "")
5351           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5352   "TARGET_NEON"
5353   ""
5356 ;; Note: Different operand numbering to handle tied registers correctly.
5357 (define_insn "*neon_vuzp<mode>_insn"
5358   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5359         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5360                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5361          UNSPEC_VUZP1))
5362    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5363         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5364          UNSPEC_VUZP2))]
5365   "TARGET_NEON"
5366   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5367   [(set_attr "type" "neon_zip<q>")]
5370 (define_expand "vec_load_lanes<mode><mode>"
5371   [(set (match_operand:VDQX 0 "s_register_operand")
5372         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5373                      UNSPEC_VLD1))]
5374   "TARGET_NEON")
5376 (define_insn "neon_vld1<mode>"
5377   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5378         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5379                     UNSPEC_VLD1))]
5380   "TARGET_NEON"
5381   "vld1.<V_sz_elem>\t%h0, %A1"
5382   [(set_attr "type" "neon_load1_1reg<q>")]
5385 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5386 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5387 ;; lane order here.
5388 (define_insn "neon_vld1_lane<mode>"
5389   [(set (match_operand:VDX 0 "s_register_operand" "=w")
5390         (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5391                      (match_operand:VDX 2 "s_register_operand" "0")
5392                      (match_operand:SI 3 "immediate_operand" "i")]
5393                     UNSPEC_VLD1_LANE))]
5394   "TARGET_NEON"
5396   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5397   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5398   operands[3] = GEN_INT (lane);
5399   if (max == 1)
5400     return "vld1.<V_sz_elem>\t%P0, %A1";
5401   else
5402     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5404   [(set_attr "type" "neon_load1_one_lane<q>")]
5407 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5408 ;; here on big endian targets.
5409 (define_insn "neon_vld1_lane<mode>"
5410   [(set (match_operand:VQX 0 "s_register_operand" "=w")
5411         (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412                      (match_operand:VQX 2 "s_register_operand" "0")
5413                      (match_operand:SI 3 "immediate_operand" "i")]
5414                     UNSPEC_VLD1_LANE))]
5415   "TARGET_NEON"
5417   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419   operands[3] = GEN_INT (lane);
5420   int regno = REGNO (operands[0]);
5421   if (lane >= max / 2)
5422     {
5423       lane -= max / 2;
5424       regno += 2;
5425       operands[3] = GEN_INT (lane);
5426     }
5427   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5428   if (max == 2)
5429     return "vld1.<V_sz_elem>\t%P0, %A1";
5430   else
5431     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5433   [(set_attr "type" "neon_load1_one_lane<q>")]
5436 (define_insn "neon_vld1_dup<mode>"
5437   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5438         (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5439   "TARGET_NEON"
5440   "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5441   [(set_attr "type" "neon_load1_all_lanes<q>")]
5444 ;; Special case for DImode.  Treat it exactly like a simple load.
5445 (define_expand "neon_vld1_dupdi"
5446   [(set (match_operand:DI 0 "s_register_operand" "")
5447         (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5448                    UNSPEC_VLD1))]
5449   "TARGET_NEON"
5450   ""
5453 (define_insn "neon_vld1_dup<mode>"
5454   [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5455         (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5456   "TARGET_NEON"
5458   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5460   [(set_attr "type" "neon_load1_all_lanes<q>")]
5463 (define_insn_and_split "neon_vld1_dupv2di"
5464    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5465     (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5466    "TARGET_NEON"
5467    "#"
5468    "&& reload_completed"
5469    [(const_int 0)]
5470    {
5471     rtx tmprtx = gen_lowpart (DImode, operands[0]);
5472     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5473     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5474     DONE;
5475     }
5476   [(set_attr "length" "8")
5477    (set_attr "type" "neon_load1_all_lanes_q")]
5480 (define_expand "vec_store_lanes<mode><mode>"
5481   [(set (match_operand:VDQX 0 "neon_struct_operand")
5482         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5483                      UNSPEC_VST1))]
5484   "TARGET_NEON")
5486 (define_insn "neon_vst1<mode>"
5487   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5488         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5489                      UNSPEC_VST1))]
5490   "TARGET_NEON"
5491   "vst1.<V_sz_elem>\t%h1, %A0"
5492   [(set_attr "type" "neon_store1_1reg<q>")])
5494 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5495 ;; here on big endian targets.
5496 (define_insn "neon_vst1_lane<mode>"
5497   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5498         (unspec:<V_elem>
5499           [(match_operand:VDX 1 "s_register_operand" "w")
5500            (match_operand:SI 2 "immediate_operand" "i")]
5501           UNSPEC_VST1_LANE))]
5502   "TARGET_NEON"
5504   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5505   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5506   operands[2] = GEN_INT (lane);
5507   if (max == 1)
5508     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5509   else
5510     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5512   [(set_attr "type" "neon_store1_one_lane<q>")]
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5519         (unspec:<V_elem>
5520           [(match_operand:VQX 1 "s_register_operand" "w")
5521            (match_operand:SI 2 "immediate_operand" "i")]
5522           UNSPEC_VST1_LANE))]
5523   "TARGET_NEON"
5525   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527   int regno = REGNO (operands[1]);
5528   if (lane >= max / 2)
5529     {
5530       lane -= max / 2;
5531       regno += 2;
5532     }
5533   operands[2] = GEN_INT (lane);
5534   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5535   if (max == 2)
5536     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5537   else
5538     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5540   [(set_attr "type" "neon_store1_one_lane<q>")]
5543 (define_expand "vec_load_lanesti<mode>"
5544   [(set (match_operand:TI 0 "s_register_operand")
5545         (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5546                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5547                    UNSPEC_VLD2))]
5548   "TARGET_NEON")
5550 (define_insn "neon_vld2<mode>"
5551   [(set (match_operand:TI 0 "s_register_operand" "=w")
5552         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5553                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554                    UNSPEC_VLD2))]
5555   "TARGET_NEON"
5557   if (<V_sz_elem> == 64)
5558     return "vld1.64\t%h0, %A1";
5559   else
5560     return "vld2.<V_sz_elem>\t%h0, %A1";
5562   [(set (attr "type")
5563       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5564                     (const_string "neon_load1_2reg<q>")
5565                     (const_string "neon_load2_2reg<q>")))]
5568 (define_expand "vec_load_lanesoi<mode>"
5569   [(set (match_operand:OI 0 "s_register_operand")
5570         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5571                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572                    UNSPEC_VLD2))]
5573   "TARGET_NEON")
5575 (define_insn "neon_vld2<mode>"
5576   [(set (match_operand:OI 0 "s_register_operand" "=w")
5577         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5578                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579                    UNSPEC_VLD2))]
5580   "TARGET_NEON"
5581   "vld2.<V_sz_elem>\t%h0, %A1"
5582   [(set_attr "type" "neon_load2_2reg_q")])
5584 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5585 ;; here on big endian targets.
5586 (define_insn "neon_vld2_lane<mode>"
5587   [(set (match_operand:TI 0 "s_register_operand" "=w")
5588         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5589                     (match_operand:TI 2 "s_register_operand" "0")
5590                     (match_operand:SI 3 "immediate_operand" "i")
5591                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5592                    UNSPEC_VLD2_LANE))]
5593   "TARGET_NEON"
5595   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5596   int regno = REGNO (operands[0]);
5597   rtx ops[4];
5598   ops[0] = gen_rtx_REG (DImode, regno);
5599   ops[1] = gen_rtx_REG (DImode, regno + 2);
5600   ops[2] = operands[1];
5601   ops[3] = GEN_INT (lane);
5602   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5603   return "";
5605   [(set_attr "type" "neon_load2_one_lane<q>")]
5608 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5609 ;; here on big endian targets.
5610 (define_insn "neon_vld2_lane<mode>"
5611   [(set (match_operand:OI 0 "s_register_operand" "=w")
5612         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5613                     (match_operand:OI 2 "s_register_operand" "0")
5614                     (match_operand:SI 3 "immediate_operand" "i")
5615                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5616                    UNSPEC_VLD2_LANE))]
5617   "TARGET_NEON"
5619   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5620   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5621   int regno = REGNO (operands[0]);
5622   rtx ops[4];
5623   if (lane >= max / 2)
5624     {
5625       lane -= max / 2;
5626       regno += 2;
5627     }
5628   ops[0] = gen_rtx_REG (DImode, regno);
5629   ops[1] = gen_rtx_REG (DImode, regno + 4);
5630   ops[2] = operands[1];
5631   ops[3] = GEN_INT (lane);
5632   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5633   return "";
5635   [(set_attr "type" "neon_load2_one_lane<q>")]
5638 (define_insn "neon_vld2_dup<mode>"
5639   [(set (match_operand:TI 0 "s_register_operand" "=w")
5640         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5641                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5642                    UNSPEC_VLD2_DUP))]
5643   "TARGET_NEON"
5645   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5646     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5647   else
5648     return "vld1.<V_sz_elem>\t%h0, %A1";
5650   [(set (attr "type")
5651       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5652                     (const_string "neon_load2_all_lanes<q>")
5653                     (const_string "neon_load1_1reg<q>")))]
5656 (define_expand "vec_store_lanesti<mode>"
5657   [(set (match_operand:TI 0 "neon_struct_operand")
5658         (unspec:TI [(match_operand:TI 1 "s_register_operand")
5659                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5660                    UNSPEC_VST2))]
5661   "TARGET_NEON")
5663 (define_insn "neon_vst2<mode>"
5664   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5665         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5666                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5667                    UNSPEC_VST2))]
5668   "TARGET_NEON"
5670   if (<V_sz_elem> == 64)
5671     return "vst1.64\t%h1, %A0";
5672   else
5673     return "vst2.<V_sz_elem>\t%h1, %A0";
5675   [(set (attr "type")
5676       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5677                     (const_string "neon_store1_2reg<q>")
5678                     (const_string "neon_store2_one_lane<q>")))]
5681 (define_expand "vec_store_lanesoi<mode>"
5682   [(set (match_operand:OI 0 "neon_struct_operand")
5683         (unspec:OI [(match_operand:OI 1 "s_register_operand")
5684                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5685                    UNSPEC_VST2))]
5686   "TARGET_NEON")
5688 (define_insn "neon_vst2<mode>"
5689   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5690         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5691                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5692                    UNSPEC_VST2))]
5693   "TARGET_NEON"
5694   "vst2.<V_sz_elem>\t%h1, %A0"
5695   [(set_attr "type" "neon_store2_4reg<q>")]
5698 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5699 ;; here on big endian targets.
5700 (define_insn "neon_vst2_lane<mode>"
5701   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5702         (unspec:<V_two_elem>
5703           [(match_operand:TI 1 "s_register_operand" "w")
5704            (match_operand:SI 2 "immediate_operand" "i")
5705            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5706           UNSPEC_VST2_LANE))]
5707   "TARGET_NEON"
5709   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5710   int regno = REGNO (operands[1]);
5711   rtx ops[4];
5712   ops[0] = operands[0];
5713   ops[1] = gen_rtx_REG (DImode, regno);
5714   ops[2] = gen_rtx_REG (DImode, regno + 2);
5715   ops[3] = GEN_INT (lane);
5716   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5717   return "";
5719   [(set_attr "type" "neon_store2_one_lane<q>")]
5722 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5723 ;; here on big endian targets.
5724 (define_insn "neon_vst2_lane<mode>"
5725   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5726         (unspec:<V_two_elem>
5727            [(match_operand:OI 1 "s_register_operand" "w")
5728             (match_operand:SI 2 "immediate_operand" "i")
5729             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5730            UNSPEC_VST2_LANE))]
5731   "TARGET_NEON"
5733   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5734   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5735   int regno = REGNO (operands[1]);
5736   rtx ops[4];
5737   if (lane >= max / 2)
5738     {
5739       lane -= max / 2;
5740       regno += 2;
5741     }
5742   ops[0] = operands[0];
5743   ops[1] = gen_rtx_REG (DImode, regno);
5744   ops[2] = gen_rtx_REG (DImode, regno + 4);
5745   ops[3] = GEN_INT (lane);
5746   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5747   return "";
5749   [(set_attr "type" "neon_store2_one_lane<q>")]
5752 (define_expand "vec_load_lanesei<mode>"
5753   [(set (match_operand:EI 0 "s_register_operand")
5754         (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5755                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5756                    UNSPEC_VLD3))]
5757   "TARGET_NEON")
5759 (define_insn "neon_vld3<mode>"
5760   [(set (match_operand:EI 0 "s_register_operand" "=w")
5761         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5762                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5763                    UNSPEC_VLD3))]
5764   "TARGET_NEON"
5766   if (<V_sz_elem> == 64)
5767     return "vld1.64\t%h0, %A1";
5768   else
5769     return "vld3.<V_sz_elem>\t%h0, %A1";
5771   [(set (attr "type")
5772       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5773                     (const_string "neon_load1_3reg<q>")
5774                     (const_string "neon_load3_3reg<q>")))]
5777 (define_expand "vec_load_lanesci<mode>"
5778   [(match_operand:CI 0 "s_register_operand")
5779    (match_operand:CI 1 "neon_struct_operand")
5780    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5781   "TARGET_NEON"
5783   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5784   DONE;
5787 (define_expand "neon_vld3<mode>"
5788   [(match_operand:CI 0 "s_register_operand")
5789    (match_operand:CI 1 "neon_struct_operand")
5790    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5791   "TARGET_NEON"
5793   rtx mem;
5795   mem = adjust_address (operands[1], EImode, 0);
5796   emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5797   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5798   emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5799   DONE;
5802 (define_insn "neon_vld3qa<mode>"
5803   [(set (match_operand:CI 0 "s_register_operand" "=w")
5804         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5805                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5806                    UNSPEC_VLD3A))]
5807   "TARGET_NEON"
5809   int regno = REGNO (operands[0]);
5810   rtx ops[4];
5811   ops[0] = gen_rtx_REG (DImode, regno);
5812   ops[1] = gen_rtx_REG (DImode, regno + 4);
5813   ops[2] = gen_rtx_REG (DImode, regno + 8);
5814   ops[3] = operands[1];
5815   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5816   return "";
5818   [(set_attr "type" "neon_load3_3reg<q>")]
5821 (define_insn "neon_vld3qb<mode>"
5822   [(set (match_operand:CI 0 "s_register_operand" "=w")
5823         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5824                     (match_operand:CI 2 "s_register_operand" "0")
5825                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5826                    UNSPEC_VLD3B))]
5827   "TARGET_NEON"
5829   int regno = REGNO (operands[0]);
5830   rtx ops[4];
5831   ops[0] = gen_rtx_REG (DImode, regno + 2);
5832   ops[1] = gen_rtx_REG (DImode, regno + 6);
5833   ops[2] = gen_rtx_REG (DImode, regno + 10);
5834   ops[3] = operands[1];
5835   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5836   return "";
5838   [(set_attr "type" "neon_load3_3reg<q>")]
5841 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5842 ;; here on big endian targets.
5843 (define_insn "neon_vld3_lane<mode>"
5844   [(set (match_operand:EI 0 "s_register_operand" "=w")
5845         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5846                     (match_operand:EI 2 "s_register_operand" "0")
5847                     (match_operand:SI 3 "immediate_operand" "i")
5848                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5849                    UNSPEC_VLD3_LANE))]
5850   "TARGET_NEON"
5852   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5853   int regno = REGNO (operands[0]);
5854   rtx ops[5];
5855   ops[0] = gen_rtx_REG (DImode, regno);
5856   ops[1] = gen_rtx_REG (DImode, regno + 2);
5857   ops[2] = gen_rtx_REG (DImode, regno + 4);
5858   ops[3] = operands[1];
5859   ops[4] = GEN_INT (lane);
5860   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5861                    ops);
5862   return "";
5864   [(set_attr "type" "neon_load3_one_lane<q>")]
5867 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5868 ;; here on big endian targets.
5869 (define_insn "neon_vld3_lane<mode>"
5870   [(set (match_operand:CI 0 "s_register_operand" "=w")
5871         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5872                     (match_operand:CI 2 "s_register_operand" "0")
5873                     (match_operand:SI 3 "immediate_operand" "i")
5874                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5875                    UNSPEC_VLD3_LANE))]
5876   "TARGET_NEON"
5878   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5879   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5880   int regno = REGNO (operands[0]);
5881   rtx ops[5];
5882   if (lane >= max / 2)
5883     {
5884       lane -= max / 2;
5885       regno += 2;
5886     }
5887   ops[0] = gen_rtx_REG (DImode, regno);
5888   ops[1] = gen_rtx_REG (DImode, regno + 4);
5889   ops[2] = gen_rtx_REG (DImode, regno + 8);
5890   ops[3] = operands[1];
5891   ops[4] = GEN_INT (lane);
5892   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5893                    ops);
5894   return "";
5896   [(set_attr "type" "neon_load3_one_lane<q>")]
5899 (define_insn "neon_vld3_dup<mode>"
5900   [(set (match_operand:EI 0 "s_register_operand" "=w")
5901         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5902                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5903                    UNSPEC_VLD3_DUP))]
5904   "TARGET_NEON"
5906   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5907     {
5908       int regno = REGNO (operands[0]);
5909       rtx ops[4];
5910       ops[0] = gen_rtx_REG (DImode, regno);
5911       ops[1] = gen_rtx_REG (DImode, regno + 2);
5912       ops[2] = gen_rtx_REG (DImode, regno + 4);
5913       ops[3] = operands[1];
5914       output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5915       return "";
5916     }
5917   else
5918     return "vld1.<V_sz_elem>\t%h0, %A1";
5920   [(set (attr "type")
5921       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5922                     (const_string "neon_load3_all_lanes<q>")
5923                     (const_string "neon_load1_1reg<q>")))])
5925 (define_expand "vec_store_lanesei<mode>"
5926   [(set (match_operand:EI 0 "neon_struct_operand")
5927         (unspec:EI [(match_operand:EI 1 "s_register_operand")
5928                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5929                    UNSPEC_VST3))]
5930   "TARGET_NEON")
5932 (define_insn "neon_vst3<mode>"
5933   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5934         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5935                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5936                    UNSPEC_VST3))]
5937   "TARGET_NEON"
5939   if (<V_sz_elem> == 64)
5940     return "vst1.64\t%h1, %A0";
5941   else
5942     return "vst3.<V_sz_elem>\t%h1, %A0";
5944   [(set (attr "type")
5945       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5946                     (const_string "neon_store1_3reg<q>")
5947                     (const_string "neon_store3_one_lane<q>")))])
5949 (define_expand "vec_store_lanesci<mode>"
5950   [(match_operand:CI 0 "neon_struct_operand")
5951    (match_operand:CI 1 "s_register_operand")
5952    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5953   "TARGET_NEON"
5955   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5956   DONE;
5959 (define_expand "neon_vst3<mode>"
5960   [(match_operand:CI 0 "neon_struct_operand")
5961    (match_operand:CI 1 "s_register_operand")
5962    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5963   "TARGET_NEON"
5965   rtx mem;
5967   mem = adjust_address (operands[0], EImode, 0);
5968   emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5969   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5970   emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5971   DONE;
5974 (define_insn "neon_vst3qa<mode>"
5975   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5976         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5977                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5978                    UNSPEC_VST3A))]
5979   "TARGET_NEON"
5981   int regno = REGNO (operands[1]);
5982   rtx ops[4];
5983   ops[0] = operands[0];
5984   ops[1] = gen_rtx_REG (DImode, regno);
5985   ops[2] = gen_rtx_REG (DImode, regno + 4);
5986   ops[3] = gen_rtx_REG (DImode, regno + 8);
5987   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5988   return "";
5990   [(set_attr "type" "neon_store3_3reg<q>")]
5993 (define_insn "neon_vst3qb<mode>"
5994   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5995         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5996                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997                    UNSPEC_VST3B))]
5998   "TARGET_NEON"
6000   int regno = REGNO (operands[1]);
6001   rtx ops[4];
6002   ops[0] = operands[0];
6003   ops[1] = gen_rtx_REG (DImode, regno + 2);
6004   ops[2] = gen_rtx_REG (DImode, regno + 6);
6005   ops[3] = gen_rtx_REG (DImode, regno + 10);
6006   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6007   return "";
6009   [(set_attr "type" "neon_store3_3reg<q>")]
6012 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6013 ;; here on big endian targets.
6014 (define_insn "neon_vst3_lane<mode>"
6015   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6016         (unspec:<V_three_elem>
6017            [(match_operand:EI 1 "s_register_operand" "w")
6018             (match_operand:SI 2 "immediate_operand" "i")
6019             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6020            UNSPEC_VST3_LANE))]
6021   "TARGET_NEON"
6023   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6024   int regno = REGNO (operands[1]);
6025   rtx ops[5];
6026   ops[0] = operands[0];
6027   ops[1] = gen_rtx_REG (DImode, regno);
6028   ops[2] = gen_rtx_REG (DImode, regno + 2);
6029   ops[3] = gen_rtx_REG (DImode, regno + 4);
6030   ops[4] = GEN_INT (lane);
6031   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6032                    ops);
6033   return "";
6035   [(set_attr "type" "neon_store3_one_lane<q>")]
6038 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6039 ;; here on big endian targets.
6040 (define_insn "neon_vst3_lane<mode>"
6041   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6042         (unspec:<V_three_elem>
6043            [(match_operand:CI 1 "s_register_operand" "w")
6044             (match_operand:SI 2 "immediate_operand" "i")
6045             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6046            UNSPEC_VST3_LANE))]
6047   "TARGET_NEON"
6049   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6050   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6051   int regno = REGNO (operands[1]);
6052   rtx ops[5];
6053   if (lane >= max / 2)
6054     {
6055       lane -= max / 2;
6056       regno += 2;
6057     }
6058   ops[0] = operands[0];
6059   ops[1] = gen_rtx_REG (DImode, regno);
6060   ops[2] = gen_rtx_REG (DImode, regno + 4);
6061   ops[3] = gen_rtx_REG (DImode, regno + 8);
6062   ops[4] = GEN_INT (lane);
6063   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6064                    ops);
6065   return "";
6067   [(set_attr "type" "neon_store3_one_lane<q>")]
6070 (define_expand "vec_load_lanesoi<mode>"
6071   [(set (match_operand:OI 0 "s_register_operand")
6072         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6073                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6074                    UNSPEC_VLD4))]
6075   "TARGET_NEON")
6077 (define_insn "neon_vld4<mode>"
6078   [(set (match_operand:OI 0 "s_register_operand" "=w")
6079         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6080                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6081                    UNSPEC_VLD4))]
6082   "TARGET_NEON"
6084   if (<V_sz_elem> == 64)
6085     return "vld1.64\t%h0, %A1";
6086   else
6087     return "vld4.<V_sz_elem>\t%h0, %A1";
6089   [(set (attr "type")
6090       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6091                     (const_string "neon_load1_4reg<q>")
6092                     (const_string "neon_load4_4reg<q>")))]
6095 (define_expand "vec_load_lanesxi<mode>"
6096   [(match_operand:XI 0 "s_register_operand")
6097    (match_operand:XI 1 "neon_struct_operand")
6098    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6099   "TARGET_NEON"
6101   emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6102   DONE;
6105 (define_expand "neon_vld4<mode>"
6106   [(match_operand:XI 0 "s_register_operand")
6107    (match_operand:XI 1 "neon_struct_operand")
6108    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6109   "TARGET_NEON"
6111   rtx mem;
6113   mem = adjust_address (operands[1], OImode, 0);
6114   emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6115   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6116   emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6117   DONE;
6120 (define_insn "neon_vld4qa<mode>"
6121   [(set (match_operand:XI 0 "s_register_operand" "=w")
6122         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6123                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6124                    UNSPEC_VLD4A))]
6125   "TARGET_NEON"
6127   int regno = REGNO (operands[0]);
6128   rtx ops[5];
6129   ops[0] = gen_rtx_REG (DImode, regno);
6130   ops[1] = gen_rtx_REG (DImode, regno + 4);
6131   ops[2] = gen_rtx_REG (DImode, regno + 8);
6132   ops[3] = gen_rtx_REG (DImode, regno + 12);
6133   ops[4] = operands[1];
6134   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6135   return "";
6137   [(set_attr "type" "neon_load4_4reg<q>")]
6140 (define_insn "neon_vld4qb<mode>"
6141   [(set (match_operand:XI 0 "s_register_operand" "=w")
6142         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6143                     (match_operand:XI 2 "s_register_operand" "0")
6144                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6145                    UNSPEC_VLD4B))]
6146   "TARGET_NEON"
6148   int regno = REGNO (operands[0]);
6149   rtx ops[5];
6150   ops[0] = gen_rtx_REG (DImode, regno + 2);
6151   ops[1] = gen_rtx_REG (DImode, regno + 6);
6152   ops[2] = gen_rtx_REG (DImode, regno + 10);
6153   ops[3] = gen_rtx_REG (DImode, regno + 14);
6154   ops[4] = operands[1];
6155   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6156   return "";
6158   [(set_attr "type" "neon_load4_4reg<q>")]
6161 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6162 ;; here on big endian targets.
6163 (define_insn "neon_vld4_lane<mode>"
6164   [(set (match_operand:OI 0 "s_register_operand" "=w")
6165         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6166                     (match_operand:OI 2 "s_register_operand" "0")
6167                     (match_operand:SI 3 "immediate_operand" "i")
6168                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6169                    UNSPEC_VLD4_LANE))]
6170   "TARGET_NEON"
6172   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6173   int regno = REGNO (operands[0]);
6174   rtx ops[6];
6175   ops[0] = gen_rtx_REG (DImode, regno);
6176   ops[1] = gen_rtx_REG (DImode, regno + 2);
6177   ops[2] = gen_rtx_REG (DImode, regno + 4);
6178   ops[3] = gen_rtx_REG (DImode, regno + 6);
6179   ops[4] = operands[1];
6180   ops[5] = GEN_INT (lane);
6181   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6182                    ops);
6183   return "";
6185   [(set_attr "type" "neon_load4_one_lane<q>")]
6188 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6189 ;; here on big endian targets.
6190 (define_insn "neon_vld4_lane<mode>"
6191   [(set (match_operand:XI 0 "s_register_operand" "=w")
6192         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6193                     (match_operand:XI 2 "s_register_operand" "0")
6194                     (match_operand:SI 3 "immediate_operand" "i")
6195                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6196                    UNSPEC_VLD4_LANE))]
6197   "TARGET_NEON"
6199   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6200   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6201   int regno = REGNO (operands[0]);
6202   rtx ops[6];
6203   if (lane >= max / 2)
6204     {
6205       lane -= max / 2;
6206       regno += 2;
6207     }
6208   ops[0] = gen_rtx_REG (DImode, regno);
6209   ops[1] = gen_rtx_REG (DImode, regno + 4);
6210   ops[2] = gen_rtx_REG (DImode, regno + 8);
6211   ops[3] = gen_rtx_REG (DImode, regno + 12);
6212   ops[4] = operands[1];
6213   ops[5] = GEN_INT (lane);
6214   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6215                    ops);
6216   return "";
6218   [(set_attr "type" "neon_load4_one_lane<q>")]
6221 (define_insn "neon_vld4_dup<mode>"
6222   [(set (match_operand:OI 0 "s_register_operand" "=w")
6223         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6224                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6225                    UNSPEC_VLD4_DUP))]
6226   "TARGET_NEON"
6228   if (GET_MODE_NUNITS (<MODE>mode) > 1)
6229     {
6230       int regno = REGNO (operands[0]);
6231       rtx ops[5];
6232       ops[0] = gen_rtx_REG (DImode, regno);
6233       ops[1] = gen_rtx_REG (DImode, regno + 2);
6234       ops[2] = gen_rtx_REG (DImode, regno + 4);
6235       ops[3] = gen_rtx_REG (DImode, regno + 6);
6236       ops[4] = operands[1];
6237       output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6238                        ops);
6239       return "";
6240     }
6241   else
6242     return "vld1.<V_sz_elem>\t%h0, %A1";
6244   [(set (attr "type")
6245       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6246                     (const_string "neon_load4_all_lanes<q>")
6247                     (const_string "neon_load1_1reg<q>")))]
6250 (define_expand "vec_store_lanesoi<mode>"
6251   [(set (match_operand:OI 0 "neon_struct_operand")
6252         (unspec:OI [(match_operand:OI 1 "s_register_operand")
6253                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6254                    UNSPEC_VST4))]
6255   "TARGET_NEON")
6257 (define_insn "neon_vst4<mode>"
6258   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6259         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6260                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6261                    UNSPEC_VST4))]
6262   "TARGET_NEON"
6264   if (<V_sz_elem> == 64)
6265     return "vst1.64\t%h1, %A0";
6266   else
6267     return "vst4.<V_sz_elem>\t%h1, %A0";
6269   [(set (attr "type")
6270       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6271                     (const_string "neon_store1_4reg<q>")
6272                     (const_string "neon_store4_4reg<q>")))]
6275 (define_expand "vec_store_lanesxi<mode>"
6276   [(match_operand:XI 0 "neon_struct_operand")
6277    (match_operand:XI 1 "s_register_operand")
6278    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6279   "TARGET_NEON"
6281   emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6282   DONE;
6285 (define_expand "neon_vst4<mode>"
6286   [(match_operand:XI 0 "neon_struct_operand")
6287    (match_operand:XI 1 "s_register_operand")
6288    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6289   "TARGET_NEON"
6291   rtx mem;
6293   mem = adjust_address (operands[0], OImode, 0);
6294   emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6295   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6296   emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6297   DONE;
6300 (define_insn "neon_vst4qa<mode>"
6301   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6302         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6303                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6304                    UNSPEC_VST4A))]
6305   "TARGET_NEON"
6307   int regno = REGNO (operands[1]);
6308   rtx ops[5];
6309   ops[0] = operands[0];
6310   ops[1] = gen_rtx_REG (DImode, regno);
6311   ops[2] = gen_rtx_REG (DImode, regno + 4);
6312   ops[3] = gen_rtx_REG (DImode, regno + 8);
6313   ops[4] = gen_rtx_REG (DImode, regno + 12);
6314   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6315   return "";
6317   [(set_attr "type" "neon_store4_4reg<q>")]
6320 (define_insn "neon_vst4qb<mode>"
6321   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6322         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6323                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6324                    UNSPEC_VST4B))]
6325   "TARGET_NEON"
6327   int regno = REGNO (operands[1]);
6328   rtx ops[5];
6329   ops[0] = operands[0];
6330   ops[1] = gen_rtx_REG (DImode, regno + 2);
6331   ops[2] = gen_rtx_REG (DImode, regno + 6);
6332   ops[3] = gen_rtx_REG (DImode, regno + 10);
6333   ops[4] = gen_rtx_REG (DImode, regno + 14);
6334   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6335   return "";
6337   [(set_attr "type" "neon_store4_4reg<q>")]
6340 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6341 ;; here on big endian targets.
6342 (define_insn "neon_vst4_lane<mode>"
6343   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6344         (unspec:<V_four_elem>
6345            [(match_operand:OI 1 "s_register_operand" "w")
6346             (match_operand:SI 2 "immediate_operand" "i")
6347             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6348            UNSPEC_VST4_LANE))]
6349   "TARGET_NEON"
6351   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6352   int regno = REGNO (operands[1]);
6353   rtx ops[6];
6354   ops[0] = operands[0];
6355   ops[1] = gen_rtx_REG (DImode, regno);
6356   ops[2] = gen_rtx_REG (DImode, regno + 2);
6357   ops[3] = gen_rtx_REG (DImode, regno + 4);
6358   ops[4] = gen_rtx_REG (DImode, regno + 6);
6359   ops[5] = GEN_INT (lane);
6360   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6361                    ops);
6362   return "";
6364   [(set_attr "type" "neon_store4_one_lane<q>")]
6367 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6368 ;; here on big endian targets.
6369 (define_insn "neon_vst4_lane<mode>"
6370   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6371         (unspec:<V_four_elem>
6372            [(match_operand:XI 1 "s_register_operand" "w")
6373             (match_operand:SI 2 "immediate_operand" "i")
6374             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6375            UNSPEC_VST4_LANE))]
6376   "TARGET_NEON"
6378   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6379   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6380   int regno = REGNO (operands[1]);
6381   rtx ops[6];
6382   if (lane >= max / 2)
6383     {
6384       lane -= max / 2;
6385       regno += 2;
6386     }
6387   ops[0] = operands[0];
6388   ops[1] = gen_rtx_REG (DImode, regno);
6389   ops[2] = gen_rtx_REG (DImode, regno + 4);
6390   ops[3] = gen_rtx_REG (DImode, regno + 8);
6391   ops[4] = gen_rtx_REG (DImode, regno + 12);
6392   ops[5] = GEN_INT (lane);
6393   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6394                    ops);
6395   return "";
6397   [(set_attr "type" "neon_store4_4reg<q>")]
6400 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6401   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6402         (SE:<V_unpack> (vec_select:<V_HALF>
6403                           (match_operand:VU 1 "register_operand" "w")
6404                           (match_operand:VU 2 "vect_par_constant_low" ""))))]
6405   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6406   "vmovl.<US><V_sz_elem> %q0, %e1"
6407   [(set_attr "type" "neon_shift_imm_long")]
6410 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6411   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6412         (SE:<V_unpack> (vec_select:<V_HALF>
6413                           (match_operand:VU 1 "register_operand" "w")
6414                           (match_operand:VU 2 "vect_par_constant_high" ""))))]
6415   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6416   "vmovl.<US><V_sz_elem> %q0, %f1"
6417   [(set_attr "type" "neon_shift_imm_long")]
6420 (define_expand "vec_unpack<US>_hi_<mode>"
6421   [(match_operand:<V_unpack> 0 "register_operand" "")
6422    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6423  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6424   {
6425    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6426    rtx t1;
6427    int i;
6428    for (i = 0; i < (<V_mode_nunits>/2); i++)
6429      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6430   
6431    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6432    emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 
6433                                                  operands[1], 
6434                                                  t1));
6435    DONE;
6436   }
6439 (define_expand "vec_unpack<US>_lo_<mode>"
6440   [(match_operand:<V_unpack> 0 "register_operand" "")
6441    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6442  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6443   {
6444    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6445    rtx t1;
6446    int i;
6447    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6448      RTVEC_ELT (v, i) = GEN_INT (i);
6449    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6450    emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 
6451                                                  operands[1], 
6452                                                  t1));
6453    DONE;
6454   }
6457 (define_insn "neon_vec_<US>mult_lo_<mode>"
6458  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6459        (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6460                            (match_operand:VU 1 "register_operand" "w") 
6461                            (match_operand:VU 2 "vect_par_constant_low" "")))
6462                         (SE:<V_unpack> (vec_select:<V_HALF>
6463                            (match_operand:VU 3 "register_operand" "w") 
6464                            (match_dup 2)))))]
6465   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6466   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6467   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6470 (define_expand "vec_widen_<US>mult_lo_<mode>"
6471   [(match_operand:<V_unpack> 0 "register_operand" "")
6472    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6473    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6474  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6476    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6477    rtx t1;
6478    int i;
6479    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6480      RTVEC_ELT (v, i) = GEN_INT (i);
6481    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6483    emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6484                                                operands[1],
6485                                                t1,
6486                                                operands[2]));
6487    DONE;
6491 (define_insn "neon_vec_<US>mult_hi_<mode>"
6492  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6493       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6494                             (match_operand:VU 1 "register_operand" "w") 
6495                             (match_operand:VU 2 "vect_par_constant_high" "")))
6496                        (SE:<V_unpack> (vec_select:<V_HALF>
6497                             (match_operand:VU 3 "register_operand" "w") 
6498                             (match_dup 2)))))]
6499   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6500   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6501   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6504 (define_expand "vec_widen_<US>mult_hi_<mode>"
6505   [(match_operand:<V_unpack> 0 "register_operand" "")
6506    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6507    (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6508  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6510    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6511    rtx t1;
6512    int i;
6513    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6514      RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6515    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6517    emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6518                                                operands[1],
6519                                                t1,
6520                                                operands[2]));
6521    DONE;
6526 (define_insn "neon_vec_<US>shiftl_<mode>"
6527  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6528        (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6529        (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6530   "TARGET_NEON"
6532   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6534   [(set_attr "type" "neon_shift_imm_long")]
6537 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6538   [(match_operand:<V_unpack> 0 "register_operand" "")
6539    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6540    (match_operand:SI 2 "immediate_operand" "i")]
6541  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6543   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6544                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6545                 operands[2]));
6546    DONE;
6550 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6551   [(match_operand:<V_unpack> 0 "register_operand" "")
6552    (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6553    (match_operand:SI 2 "immediate_operand" "i")]
6554  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6556   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6557                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6558                                      GET_MODE_SIZE (<V_HALF>mode)),
6559                 operands[2]));
6560    DONE;
6564 ;; Vectorize for non-neon-quad case
6565 (define_insn "neon_unpack<US>_<mode>"
6566  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6567        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6568  "TARGET_NEON"
6569  "vmovl.<US><V_sz_elem> %q0, %P1"
6570   [(set_attr "type" "neon_move")]
6573 (define_expand "vec_unpack<US>_lo_<mode>"
6574  [(match_operand:<V_double_width> 0 "register_operand" "")
6575   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6576  "TARGET_NEON"
6578   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6579   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6580   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6582   DONE;
6586 (define_expand "vec_unpack<US>_hi_<mode>"
6587  [(match_operand:<V_double_width> 0 "register_operand" "")
6588   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6589  "TARGET_NEON"
6591   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6592   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6593   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6595   DONE;
6599 (define_insn "neon_vec_<US>mult_<mode>"
6600  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6601        (mult:<V_widen> (SE:<V_widen> 
6602                            (match_operand:VDI 1 "register_operand" "w"))
6603                        (SE:<V_widen> 
6604                            (match_operand:VDI 2 "register_operand" "w"))))]
6605   "TARGET_NEON"
6606   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6607   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6610 (define_expand "vec_widen_<US>mult_hi_<mode>"
6611   [(match_operand:<V_double_width> 0 "register_operand" "")
6612    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6613    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6614  "TARGET_NEON"
6616    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6617    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6618    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6619                                             
6620    DONE;
6625 (define_expand "vec_widen_<US>mult_lo_<mode>"
6626   [(match_operand:<V_double_width> 0 "register_operand" "")
6627    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6628    (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6629  "TARGET_NEON"
6631    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6632    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6633    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6634                                             
6635    DONE;
6640 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6641  [(match_operand:<V_double_width> 0 "register_operand" "")
6642    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6643    (match_operand:SI 2 "immediate_operand" "i")]
6644  "TARGET_NEON"
6646    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6647    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6648    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6650    DONE;
6654 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6655   [(match_operand:<V_double_width> 0 "register_operand" "")
6656    (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6657    (match_operand:SI 2 "immediate_operand" "i")]
6658  "TARGET_NEON"
6660    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6661    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6662    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6664    DONE;
6668 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6669 ; because the ordering of vector elements in Q registers is different from what
6670 ; the semantics of the instructions require.
6672 (define_insn "vec_pack_trunc_<mode>"
6673  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6674        (vec_concat:<V_narrow_pack> 
6675                 (truncate:<V_narrow> 
6676                         (match_operand:VN 1 "register_operand" "w"))
6677                 (truncate:<V_narrow>
6678                         (match_operand:VN 2 "register_operand" "w"))))]
6679  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6680  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6681  [(set_attr "type" "multiple")
6682   (set_attr "length" "8")]
6685 ;; For the non-quad case.
6686 (define_insn "neon_vec_pack_trunc_<mode>"
6687  [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6688        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6689  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6690  "vmovn.i<V_sz_elem>\t%P0, %q1"
6691  [(set_attr "type" "neon_move_narrow_q")]
6694 (define_expand "vec_pack_trunc_<mode>"
6695  [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6696   (match_operand:VSHFT 1 "register_operand" "")
6697   (match_operand:VSHFT 2 "register_operand")]
6698  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6700   rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6701   
6702   emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 
6703   emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 
6704   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6705   DONE;
6708 (define_insn "neon_vabd<mode>_2"
6709  [(set (match_operand:VF 0 "s_register_operand" "=w")
6710        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6711                          (match_operand:VF 2 "s_register_operand" "w"))))]
6712  "TARGET_NEON && flag_unsafe_math_optimizations"
6713  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6714  [(set_attr "type" "neon_fp_abd_s<q>")]
6717 (define_insn "neon_vabd<mode>_3"
6718  [(set (match_operand:VF 0 "s_register_operand" "=w")
6719        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6720                             (match_operand:VF 2 "s_register_operand" "w")]
6721                 UNSPEC_VSUB)))]
6722  "TARGET_NEON && flag_unsafe_math_optimizations"
6723  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6724  [(set_attr "type" "neon_fp_abd_s<q>")]
6727 ;; Copy from core-to-neon regs, then extend, not vice-versa
6729 (define_split
6730   [(set (match_operand:DI 0 "s_register_operand" "")
6731         (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6732   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6733   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6734    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6735   {
6736     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6737   })
6739 (define_split
6740   [(set (match_operand:DI 0 "s_register_operand" "")
6741         (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6742   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6743   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6744    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6745   {
6746     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6747   })
6749 (define_split
6750   [(set (match_operand:DI 0 "s_register_operand" "")
6751         (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6752   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6753   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6754    (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6755   {
6756     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6757   })
6759 (define_split
6760   [(set (match_operand:DI 0 "s_register_operand" "")
6761         (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6762   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6763   [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6764    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6765   {
6766     operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6767   })
6769 (define_split
6770   [(set (match_operand:DI 0 "s_register_operand" "")
6771         (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6772   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6773   [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6774    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6775   {
6776     operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6777   })
6779 (define_split
6780   [(set (match_operand:DI 0 "s_register_operand" "")
6781         (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6782   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6783   [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6784    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6785   {
6786     operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6787   })