Add assember CFI directives to millicode division and remainder routines.
[official-gcc.git] / gcc / config / arm / neon.md
blobd213369ffc38fb88ad0357d848cc7da5af73bab7
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "unaligned_storev8qi"
27   [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29                      UNSPEC_UNALIGNED_STORE))]
30   "TARGET_NEON"
31   "*
32   return output_move_neon (operands);
33   "
34   [(set_attr "type" "neon_store1_1reg")])
36 (define_insn "*neon_mov<mode>"
37   [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38           "=w,Un,w, w, w,  ?r,?w,?r, ?Us,*r")
39         (match_operand:VDXMOV 1 "general_operand"
40           " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
41   "TARGET_NEON
42    && (register_operand (operands[0], <MODE>mode)
43        || register_operand (operands[1], <MODE>mode))"
45   if (which_alternative == 2 || which_alternative == 3)
46     {
47       int width, is_valid;
48       static char templ[40];
50       is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51         &operands[1], &width);
53       gcc_assert (is_valid != 0);
55       if (width == 0)
56         return "vmov.f32\t%P0, %1  @ <mode>";
57       else
58         sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
60       return templ;
61     }
63   switch (which_alternative)
64     {
65     case 0: return "vmov\t%P0, %P1  @ <mode>";
66     case 1: case 4: return output_move_neon (operands);
67     case 2: case 3: gcc_unreachable ();
68     case 5: return "vmov\t%Q0, %R0, %P1  @ <mode>";
69     case 6: return "vmov\t%P0, %Q1, %R1  @ <mode>";
70     case 9: return "#";
71     default: return output_move_double (operands, true, NULL);
72     }
74  [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75                     neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76                     neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77                     multiple")
78   (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79   (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,1020,*,*")
80   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,1018,*,*")
81   (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
83 (define_insn "*neon_mov<mode>"
84   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85           "=w,Un,w, w, w,  ?r,?w,?r,?r,  ?Us")
86         (match_operand:VQXMOV 1 "general_operand"
87           " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
88   "TARGET_NEON
89    && (register_operand (operands[0], <MODE>mode)
90        || register_operand (operands[1], <MODE>mode))"
92   if (which_alternative == 2 || which_alternative == 3)
93     {
94       int width, is_valid;
95       static char templ[40];
97       is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98         &operands[1], &width);
100       gcc_assert (is_valid != 0);
102       if (width == 0)
103         return "vmov.f32\t%q0, %1  @ <mode>";
104       else
105         sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
107       return templ;
108     }
110   switch (which_alternative)
111     {
112     case 0: return "vmov\t%q0, %q1  @ <mode>";
113     case 1: case 4: return output_move_neon (operands);
114     case 2: case 3: gcc_unreachable ();
115     case 5: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
116     case 6: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
117     default: return output_move_quad (operands);
118     }
120   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121                      neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122                      neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123    (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124    (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125    (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126    (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
128 /* We define these mov expanders to match the standard mov$a optab to prevent
129    the mid-end from trying to do a subreg for these modes which is the most
130    inefficient way to expand the move.  Also big-endian subreg's aren't
131    allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132    Without these RTL generation patterns the mid-end would attempt to take a
133    sub-reg and may ICE if it can't.  */
135 (define_expand "movti"
136   [(set (match_operand:TI 0 "nonimmediate_operand")
137         (match_operand:TI 1 "general_operand"))]
138   "TARGET_NEON"
140   gcc_checking_assert (aligned_operand (operands[0], TImode));
141   gcc_checking_assert (aligned_operand (operands[1], TImode));
142   if (can_create_pseudo_p ())
143     {
144       if (!REG_P (operands[0]))
145         operands[1] = force_reg (TImode, operands[1]);
146     }
149 (define_expand "mov<mode>"
150   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151         (match_operand:VSTRUCT 1 "general_operand"))]
152   "TARGET_NEON || TARGET_HAVE_MVE"
154   gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155   gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156   if (can_create_pseudo_p ())
157     {
158       if (!REG_P (operands[0]))
159         operands[1] = force_reg (<MODE>mode, operands[1]);
160     }
163 ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164 ;; two groups.  The pattern movv8hf is common for MVE and NEON, so it is moved
165 ;; into vec-common.md file.  Remaining mov expand patterns with half float and
166 ;; bfloats are implemented below.
167 (define_expand "mov<mode>"
168   [(set (match_operand:VHFBF_split 0 "s_register_operand")
169         (match_operand:VHFBF_split 1 "s_register_operand"))]
170   "TARGET_NEON"
172   gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173   gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174   if (can_create_pseudo_p ())
175     {
176       if (!REG_P (operands[0]))
177         operands[1] = force_reg (<MODE>mode, operands[1]);
178     }
181 (define_insn "*neon_mov<mode>"
182   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183         (match_operand:VSTRUCT 1 "general_operand"      " w,w, Ut"))]
184   "(TARGET_NEON || TARGET_HAVE_MVE)
185    && (register_operand (operands[0], <MODE>mode)
186        || register_operand (operands[1], <MODE>mode))"
188   switch (which_alternative)
189     {
190     case 0: return "#";
191     case 1: case 2: return output_move_neon (operands);
192     default: gcc_unreachable ();
193     }
195   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
198 (define_split
199   [(set (match_operand:EI 0 "s_register_operand" "")
200         (match_operand:EI 1 "s_register_operand" ""))]
201   "TARGET_NEON && reload_completed"
202   [(set (match_dup 0) (match_dup 1))
203    (set (match_dup 2) (match_dup 3))]
205   int rdest = REGNO (operands[0]);
206   int rsrc = REGNO (operands[1]);
207   rtx dest[2], src[2];
209   dest[0] = gen_rtx_REG (TImode, rdest);
210   src[0] = gen_rtx_REG (TImode, rsrc);
211   dest[1] = gen_rtx_REG (DImode, rdest + 4);
212   src[1] = gen_rtx_REG (DImode, rsrc + 4);
214   neon_disambiguate_copy (operands, dest, src, 2);
217 (define_split
218   [(set (match_operand:OI 0 "s_register_operand" "")
219         (match_operand:OI 1 "s_register_operand" ""))]
220   "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221   [(set (match_dup 0) (match_dup 1))
222    (set (match_dup 2) (match_dup 3))]
224   int rdest = REGNO (operands[0]);
225   int rsrc = REGNO (operands[1]);
226   rtx dest[2], src[2];
228   dest[0] = gen_rtx_REG (TImode, rdest);
229   src[0] = gen_rtx_REG (TImode, rsrc);
230   dest[1] = gen_rtx_REG (TImode, rdest + 4);
231   src[1] = gen_rtx_REG (TImode, rsrc + 4);
233   neon_disambiguate_copy (operands, dest, src, 2);
236 (define_split
237   [(set (match_operand:CI 0 "s_register_operand" "")
238         (match_operand:CI 1 "s_register_operand" ""))]
239   "TARGET_NEON && reload_completed"
240   [(set (match_dup 0) (match_dup 1))
241    (set (match_dup 2) (match_dup 3))
242    (set (match_dup 4) (match_dup 5))]
244   int rdest = REGNO (operands[0]);
245   int rsrc = REGNO (operands[1]);
246   rtx dest[3], src[3];
248   dest[0] = gen_rtx_REG (TImode, rdest);
249   src[0] = gen_rtx_REG (TImode, rsrc);
250   dest[1] = gen_rtx_REG (TImode, rdest + 4);
251   src[1] = gen_rtx_REG (TImode, rsrc + 4);
252   dest[2] = gen_rtx_REG (TImode, rdest + 8);
253   src[2] = gen_rtx_REG (TImode, rsrc + 8);
255   neon_disambiguate_copy (operands, dest, src, 3);
258 (define_split
259   [(set (match_operand:XI 0 "s_register_operand" "")
260         (match_operand:XI 1 "s_register_operand" ""))]
261   "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262   [(set (match_dup 0) (match_dup 1))
263    (set (match_dup 2) (match_dup 3))
264    (set (match_dup 4) (match_dup 5))
265    (set (match_dup 6) (match_dup 7))]
267   int rdest = REGNO (operands[0]);
268   int rsrc = REGNO (operands[1]);
269   rtx dest[4], src[4];
271   dest[0] = gen_rtx_REG (TImode, rdest);
272   src[0] = gen_rtx_REG (TImode, rsrc);
273   dest[1] = gen_rtx_REG (TImode, rdest + 4);
274   src[1] = gen_rtx_REG (TImode, rsrc + 4);
275   dest[2] = gen_rtx_REG (TImode, rdest + 8);
276   src[2] = gen_rtx_REG (TImode, rsrc + 8);
277   dest[3] = gen_rtx_REG (TImode, rdest + 12);
278   src[3] = gen_rtx_REG (TImode, rsrc + 12);
280   neon_disambiguate_copy (operands, dest, src, 4);
283 (define_insn "*movmisalign<mode>_neon_store"
284   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"   "=Um")
285         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
286                     UNSPEC_MISALIGNED_ACCESS))]
287   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288   "vst1.<V_sz_elem>\t{%P1}, %A0"
289   [(set_attr "type" "neon_store1_1reg<q>")])
291 (define_insn "*movmisalign<mode>_neon_load"
292   [(set (match_operand:VDX 0 "s_register_operand"                       "=w")
293         (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
294                                                                         " Um")]
295                     UNSPEC_MISALIGNED_ACCESS))]
296   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
297   "vld1.<V_sz_elem>\t{%P0}, %A1"
298   [(set_attr "type" "neon_load1_1reg<q>")])
300 (define_insn "*movmisalign<mode>_neon_store"
301   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
302         (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
303                     UNSPEC_MISALIGNED_ACCESS))]
304   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
305   "vst1.<V_sz_elem>\t{%q1}, %A0"
306   [(set_attr "type" "neon_store1_1reg<q>")])
308 (define_insn "*movmisalign<mode>_neon_load"
309   [(set (match_operand:VQX 0 "s_register_operand"                       "=w")
310         (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
311                                                                         " Um")]
312                     UNSPEC_MISALIGNED_ACCESS))]
313   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
314   "vld1.<V_sz_elem>\t{%q0}, %A1"
315   [(set_attr "type" "neon_load1_1reg<q>")])
317 (define_insn "@vec_set<mode>_internal"
318   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
319         (vec_merge:VD_LANE
320           (vec_duplicate:VD_LANE
321             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
322           (match_operand:VD_LANE 3 "s_register_operand" "0,0")
323           (match_operand:SI 2 "immediate_operand" "i,i")))]
324   "TARGET_NEON"
326   int elt = ffs ((int) INTVAL (operands[2])) - 1;
327   if (BYTES_BIG_ENDIAN)
328     elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
329   operands[2] = GEN_INT (elt);
331   if (which_alternative == 0)
332     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
333   else
334     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
336   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
338 (define_insn "@vec_set<mode>_internal"
339   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
340         (vec_merge:VQ2
341           (vec_duplicate:VQ2
342             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
343           (match_operand:VQ2 3 "s_register_operand" "0,0")
344           (match_operand:SI 2 "immediate_operand" "i,i")))]
345   "TARGET_NEON"
347   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
348   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
349   int elt = elem % half_elts;
350   int hi = (elem / half_elts) * 2;
351   int regno = REGNO (operands[0]);
353   if (BYTES_BIG_ENDIAN)
354     elt = half_elts - 1 - elt;
356   operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
357   operands[2] = GEN_INT (elt);
359   if (which_alternative == 0)
360     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
361   else
362     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
364   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
367 (define_insn "@vec_set<mode>_internal"
368   [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
369         (vec_merge:V2DI_ONLY
370           (vec_duplicate:V2DI_ONLY
371             (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
372           (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
373           (match_operand:SI 2 "immediate_operand" "i,i")))]
374   "TARGET_NEON"
376   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
377   int regno = REGNO (operands[0]) + 2 * elem;
379   operands[0] = gen_rtx_REG (DImode, regno);
381   if (which_alternative == 0)
382     return "vld1.64\t%P0, %A1";
383   else
384     return "vmov\t%P0, %Q1, %R1";
386   [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
389 (define_insn "vec_extract<mode><V_elem_l>"
390   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
391         (vec_select:<V_elem>
392           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
393           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
394   "TARGET_NEON"
396   if (BYTES_BIG_ENDIAN)
397     {
398       int elt = INTVAL (operands[2]);
399       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
400       operands[2] = GEN_INT (elt);
401     }
403   if (which_alternative == 0)
404     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
405   else
406     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
408   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
411 ;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
412 ;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
413 ;; by define_expand in vec-common.md file.
414 (define_insn "neon_vec_extract<mode><V_elem_l>"
415   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
416         (vec_select:<V_elem>
417           (match_operand:VQ2 1 "s_register_operand" "w,w")
418           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
419   "TARGET_NEON"
421   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
422   int elt = INTVAL (operands[2]) % half_elts;
423   int hi = (INTVAL (operands[2]) / half_elts) * 2;
424   int regno = REGNO (operands[1]);
426   if (BYTES_BIG_ENDIAN)
427     elt = half_elts - 1 - elt;
429   operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
430   operands[2] = GEN_INT (elt);
432   if (which_alternative == 0)
433     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
434   else
435     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
437   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
440 ;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
441 ;; and this pattern is called by define_expand in vec-common.md file.
442 (define_insn "neon_vec_extractv2didi"
443   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
444         (vec_select:DI
445           (match_operand:V2DI 1 "s_register_operand" "w,w")
446           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
447   "TARGET_NEON"
449   int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
451   operands[1] = gen_rtx_REG (DImode, regno);
453   if (which_alternative == 0)
454     return "vst1.64\t{%P1}, %A0  @ v2di";
455   else
456     return "vmov\t%Q0, %R0, %P1  @ v2di";
458   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469                   (match_operand:VDQ 2 "s_register_operand" "w")))]
470   "ARM_HAVE_NEON_<MODE>_ARITH"
471   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472   [(set (attr "type")
473       (if_then_else (match_test "<Is_float_mode>")
474                     (const_string "neon_fp_addsub_s<q>")
475                     (const_string "neon_add<q>")))]
478 (define_insn "*sub<mode>3_neon"
479   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
480         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
481                    (match_operand:VDQ 2 "s_register_operand" "w")))]
482   "ARM_HAVE_NEON_<MODE>_ARITH"
483   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
484   [(set (attr "type")
485       (if_then_else (match_test "<Is_float_mode>")
486                     (const_string "neon_fp_addsub_s<q>")
487                     (const_string "neon_sub<q>")))]
490 (define_insn "*mul<mode>3_neon"
491   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
492         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
493                    (match_operand:VDQW 2 "s_register_operand" "w")))]
494   "ARM_HAVE_NEON_<MODE>_ARITH"
495   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
496   [(set (attr "type")
497       (if_then_else (match_test "<Is_float_mode>")
498                     (const_string "neon_fp_mul_s<q>")
499                     (const_string "neon_mul_<V_elem_ch><q>")))]
502 /* Perform division using multiply-by-reciprocal.
503    Reciprocal is calculated using Newton-Raphson method.
504    Enabled with -funsafe-math-optimizations -freciprocal-math
505    and disabled for -Os since it increases code size .  */
507 (define_expand "div<mode>3"
508   [(set (match_operand:VCVTF 0 "s_register_operand")
509         (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
510                   (match_operand:VCVTF 2 "s_register_operand")))]
511   "TARGET_NEON && !optimize_size
512    && flag_reciprocal_math"
513   {
514     rtx rec = gen_reg_rtx (<MODE>mode);
515     rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
517     /* Reciprocal estimate.  */
518     emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
520     /* Perform 2 iterations of newton-raphson method.  */
521     for (int i = 0; i < 2; i++)
522       {
523         emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
524         emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
525       }
527     /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
528     emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
529     DONE;
530   }
534 (define_insn "mul<mode>3add<mode>_neon"
535   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
536         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
537                             (match_operand:VDQW 3 "s_register_operand" "w"))
538                   (match_operand:VDQW 1 "s_register_operand" "0")))]
539   "ARM_HAVE_NEON_<MODE>_ARITH"
540   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
541   [(set (attr "type")
542       (if_then_else (match_test "<Is_float_mode>")
543                     (const_string "neon_fp_mla_s<q>")
544                     (const_string "neon_mla_<V_elem_ch><q>")))]
547 (define_insn "mul<mode>3add<mode>_neon"
548   [(set (match_operand:VH 0 "s_register_operand" "=w")
549         (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
550                           (match_operand:VH 3 "s_register_operand" "w"))
551                   (match_operand:VH 1 "s_register_operand" "0")))]
552   "ARM_HAVE_NEON_<MODE>_ARITH"
553   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
554   [(set_attr "type" "neon_fp_mla_s<q>")]
557 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
558   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
559         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
560                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
561                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
562   "ARM_HAVE_NEON_<MODE>_ARITH"
563   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
564   [(set (attr "type")
565       (if_then_else (match_test "<Is_float_mode>")
566                     (const_string "neon_fp_mla_s<q>")
567                     (const_string "neon_mla_<V_elem_ch><q>")))]
570 ;; Fused multiply-accumulate
571 ;; We define each insn twice here:
572 ;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
573 ;;       to be able to use when converting to FMA.
574 ;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
575 (define_insn "fma<VCVTF:mode>4"
576   [(set (match_operand:VCVTF 0 "register_operand" "=w")
577         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
578                  (match_operand:VCVTF 2 "register_operand" "w")
579                  (match_operand:VCVTF 3 "register_operand" "0")))]
580   "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
581   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
582   [(set_attr "type" "neon_fp_mla_s<q>")]
585 (define_insn "fma<VCVTF:mode>4_intrinsic"
586   [(set (match_operand:VCVTF 0 "register_operand" "=w")
587         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
588                  (match_operand:VCVTF 2 "register_operand" "w")
589                  (match_operand:VCVTF 3 "register_operand" "0")))]
590   "TARGET_NEON && TARGET_FMA"
591   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
592   [(set_attr "type" "neon_fp_mla_s<q>")]
595 (define_insn "fma<VH:mode>4"
596  [(set (match_operand:VH 0 "register_operand" "=w")
597    (fma:VH
598     (match_operand:VH 1 "register_operand" "w")
599     (match_operand:VH 2 "register_operand" "w")
600     (match_operand:VH 3 "register_operand" "0")))]
601  "ARM_HAVE_NEON_<MODE>_ARITH"
602  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
603  [(set_attr "type" "neon_fp_mla_s<q>")]
606 (define_insn "*fmsub<VCVTF:mode>4"
607   [(set (match_operand:VCVTF 0 "register_operand" "=w")
608         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
609                    (match_operand:VCVTF 2 "register_operand" "w")
610                    (match_operand:VCVTF 3 "register_operand" "0")))]
611   "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
612   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
613   [(set_attr "type" "neon_fp_mla_s<q>")]
616 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
617  [(set (match_operand:VCVTF 0 "register_operand" "=w")
618    (fma:VCVTF
619     (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
620     (match_operand:VCVTF 2 "register_operand" "w")
621     (match_operand:VCVTF 3 "register_operand" "0")))]
622  "TARGET_NEON && TARGET_FMA"
623  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
624  [(set_attr "type" "neon_fp_mla_s<q>")]
627 (define_insn "fmsub<VH:mode>4_intrinsic"
628  [(set (match_operand:VH 0 "register_operand" "=w")
629    (fma:VH
630     (neg:VH (match_operand:VH 1 "register_operand" "w"))
631     (match_operand:VH 2 "register_operand" "w")
632     (match_operand:VH 3 "register_operand" "0")))]
633  "TARGET_NEON_FP16INST"
634  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
635  [(set_attr "type" "neon_fp_mla_s<q>")]
638 (define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2"
639   [(set (match_operand:VCVTF 0 "s_register_operand")
640         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")]
641                       NEON_VRINT))]
642   "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
645 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
646   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
647         (unspec:VCVTF [(match_operand:VCVTF 1
648                          "s_register_operand" "w")]
649                 NEON_VRINT))]
650   "TARGET_NEON && TARGET_VFP5"
651   "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
652   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
655 (define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2"
656   [(set (match_operand:<V_cmp_result> 0 "register_operand")
657         (FIXUORS:<V_cmp_result>
658           (unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")]
659                         NEON_VCVT)))]
660   "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
663 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
664   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
665         (FIXUORS:<V_cmp_result> (unspec:VCVTF
666                                [(match_operand:VCVTF 1 "register_operand" "w")]
667                                NEON_VCVT)))]
668   "TARGET_NEON && TARGET_VFP5"
669   "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
670   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
671    (set_attr "predicable" "no")]
674 (define_insn "ior<mode>3_neon"
675   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
676         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
677                  (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
678   "TARGET_NEON"
680   switch (which_alternative)
681     {
682     case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
683     case 1: return neon_output_logic_immediate ("vorr", &operands[2],
684                      <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
685     default: gcc_unreachable ();
686     }
688   [(set_attr "type" "neon_logic<q>")]
691 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
692 ;; vorr. We support the pseudo-instruction vand instead, because that
693 ;; corresponds to the canonical form the middle-end expects to use for
694 ;; immediate bitwise-ANDs.
696 (define_insn "and<mode>3_neon"
697   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
698         (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
699                  (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
700   "TARGET_NEON"
702   switch (which_alternative)
703     {
704     case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
705     case 1: return neon_output_logic_immediate ("vand", &operands[2],
706                      <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
707     default: gcc_unreachable ();
708     }
710   [(set_attr "type" "neon_logic<q>")]
713 (define_insn "orn<mode>3_neon"
714   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
715         (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
716                  (match_operand:VDQ 1 "s_register_operand" "w")))]
717   "TARGET_NEON"
718   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
719   [(set_attr "type" "neon_logic<q>")]
722 (define_insn "bic<mode>3_neon"
723   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
724         (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
725                  (match_operand:VDQ 1 "s_register_operand" "w")))]
726   "TARGET_NEON"
727   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
728   [(set_attr "type" "neon_logic<q>")]
731 (define_insn "xor<mode>3_neon"
732   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733         (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
734                  (match_operand:VDQ 2 "s_register_operand" "w")))]
735   "TARGET_NEON"
736   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737   [(set_attr "type" "neon_logic<q>")]
740 (define_insn "one_cmpl<mode>2_neon"
741   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
742         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
743   "TARGET_NEON"
744   "vmvn\t%<V_reg>0, %<V_reg>1"
745   [(set_attr "type" "neon_move<q>")]
748 (define_insn "neon_abs<mode>2"
749   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
750         (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
751   "TARGET_NEON"
752   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
753   [(set (attr "type")
754       (if_then_else (match_test "<Is_float_mode>")
755                     (const_string "neon_fp_abs_s<q>")
756                     (const_string "neon_abs<q>")))]
759 (define_insn "neon_neg<mode>2"
760   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
761         (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
762   "TARGET_NEON"
763   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
764   [(set (attr "type")
765       (if_then_else (match_test "<Is_float_mode>")
766                     (const_string "neon_fp_neg_s<q>")
767                     (const_string "neon_neg<q>")))]
770 (define_insn "neon_<absneg_str><mode>2"
771   [(set (match_operand:VH 0 "s_register_operand" "=w")
772     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
773  "TARGET_NEON_FP16INST"
774  "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
775  [(set_attr "type" "neon_abs<q>")]
778 (define_expand "neon_v<absneg_str><mode>"
779  [(set
780    (match_operand:VH 0 "s_register_operand")
781    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
782  "TARGET_NEON_FP16INST"
784   emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
785   DONE;
788 (define_insn "neon_v<fp16_rnd_str><mode>"
789   [(set (match_operand:VH 0 "s_register_operand" "=w")
790     (unspec:VH
791      [(match_operand:VH 1 "s_register_operand" "w")]
792      FP16_RND))]
793  "TARGET_NEON_FP16INST"
794  "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
795  [(set_attr "type" "neon_fp_round_s<q>")]
798 (define_insn "neon_vrsqrte<mode>"
799   [(set (match_operand:VH 0 "s_register_operand" "=w")
800     (unspec:VH
801      [(match_operand:VH 1 "s_register_operand" "w")]
802      UNSPEC_VRSQRTE))]
803   "TARGET_NEON_FP16INST"
804   "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
805  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
808 (define_insn "*umin<mode>3_neon"
809   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
810         (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
811                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
812   "TARGET_NEON"
813   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
814   [(set_attr "type" "neon_minmax<q>")]
817 (define_insn "*umax<mode>3_neon"
818   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
819         (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
820                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
821   "TARGET_NEON"
822   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
823   [(set_attr "type" "neon_minmax<q>")]
826 (define_insn "*smin<mode>3_neon"
827   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
828         (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
829                    (match_operand:VDQW 2 "s_register_operand" "w")))]
830   "TARGET_NEON"
831   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
832   [(set (attr "type")
833       (if_then_else (match_test "<Is_float_mode>")
834                     (const_string "neon_fp_minmax_s<q>")
835                     (const_string "neon_minmax<q>")))]
838 (define_insn "*smax<mode>3_neon"
839   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
840         (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
841                    (match_operand:VDQW 2 "s_register_operand" "w")))]
842   "TARGET_NEON"
843   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
844   [(set (attr "type")
845       (if_then_else (match_test "<Is_float_mode>")
846                     (const_string "neon_fp_minmax_s<q>")
847                     (const_string "neon_minmax<q>")))]
850 ; TODO: V2DI shifts are current disabled because there are bugs in the
851 ; generic vectorizer code.  It ends up creating a V2DI constructor with
852 ; SImode elements.
854 (define_insn "vashr<mode>3_imm"
855   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
856         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
857                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
858   "TARGET_NEON"
859   {
860     return neon_output_shift_immediate ("vshr", 's', &operands[2],
861                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
862                                         false);
863   }
864   [(set_attr "type" "neon_shift_imm<q>")]
867 (define_insn "vlshr<mode>3_imm"
868   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
869         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
870                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
871   "TARGET_NEON"
872   {
873     return neon_output_shift_immediate ("vshr", 'u', &operands[2],
874                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
875                                         false);
876   }              
877   [(set_attr "type" "neon_shift_imm<q>")]
880 ; Used for implementing logical shift-right, which is a left-shift by a negative
881 ; amount, with signed operands. This is essentially the same as ashl<mode>3
882 ; above, but using an unspec in case GCC tries anything tricky with negative
883 ; shift amounts.
885 (define_insn "ashl<mode>3_signed"
886   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
887         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
888                       (match_operand:VDQI 2 "s_register_operand" "w")]
889                      UNSPEC_ASHIFT_SIGNED))]
890   "TARGET_NEON"
891   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
892   [(set_attr "type" "neon_shift_reg<q>")]
895 ; Used for implementing logical shift-right, which is a left-shift by a negative
896 ; amount, with unsigned operands.
898 (define_insn "ashl<mode>3_unsigned"
899   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
900         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
901                       (match_operand:VDQI 2 "s_register_operand" "w")]
902                      UNSPEC_ASHIFT_UNSIGNED))]
903   "TARGET_NEON"
904   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
905   [(set_attr "type" "neon_shift_reg<q>")]
908 ;; 64-bit shifts
910 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
911 ;; leaving the upper half uninitalized.  This is OK since the shift
912 ;; instruction only looks at the low 8 bits anyway.  To avoid confusing
913 ;; data flow analysis however, we pretend the full register is set
914 ;; using an unspec.
915 (define_insn "neon_load_count"
916   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
917         (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
918                    UNSPEC_LOAD_COUNT))]
919   "TARGET_NEON"
920   "@
921    vld1.32\t{%P0[0]}, %A1
922    vmov.32\t%P0[0], %1"
923   [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
926 ;; Widening operations
928 (define_expand "widen_ssum<mode>3"
929   [(set (match_operand:<V_double_width> 0 "s_register_operand")
930         (plus:<V_double_width>
931          (sign_extend:<V_double_width>
932           (match_operand:VQI 1 "s_register_operand"))
933          (match_operand:<V_double_width> 2 "s_register_operand")))]
934   "TARGET_NEON"
935   {
936     machine_mode mode = GET_MODE (operands[1]);
937     rtx p1, p2;
939     p1  = arm_simd_vect_par_cnst_half (mode, false);
940     p2  = arm_simd_vect_par_cnst_half (mode, true);
942     if (operands[0] != operands[2])
943       emit_move_insn (operands[0], operands[2]);
945     emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
946                                                          operands[1],
947                                                          p1,
948                                                          operands[0]));
949     emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
950                                                          operands[1],
951                                                          p2,
952                                                          operands[0]));
953     DONE;
954   }
957 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
958   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
959         (plus:<V_double_width>
960          (sign_extend:<V_double_width>
961           (vec_select:<V_HALF>
962            (match_operand:VQI 1 "s_register_operand" "%w")
963            (match_operand:VQI 2 "vect_par_constant_low" "")))
964          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
965   "TARGET_NEON"
967   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
968     "vaddw.<V_s_elem>\t%q0, %q3, %e1";
970   [(set_attr "type" "neon_add_widen")])
972 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
973   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
974         (plus:<V_double_width>
975          (sign_extend:<V_double_width>
976           (vec_select:<V_HALF>
977                          (match_operand:VQI 1 "s_register_operand" "%w")
978                          (match_operand:VQI 2 "vect_par_constant_high" "")))
979          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
980   "TARGET_NEON"
982   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
983     "vaddw.<V_s_elem>\t%q0, %q3, %f1";
985   [(set_attr "type" "neon_add_widen")])
987 (define_insn "widen_ssum<mode>3"
988   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
989         (plus:<V_widen>
990          (sign_extend:<V_widen>
991           (match_operand:VW 1 "s_register_operand" "%w"))
992          (match_operand:<V_widen> 2 "s_register_operand" "w")))]
993   "TARGET_NEON"
994   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
995   [(set_attr "type" "neon_add_widen")]
998 (define_expand "widen_usum<mode>3"
999   [(set (match_operand:<V_double_width> 0 "s_register_operand")
1000         (plus:<V_double_width>
1001          (zero_extend:<V_double_width>
1002           (match_operand:VQI 1 "s_register_operand"))
1003          (match_operand:<V_double_width> 2 "s_register_operand")))]
1004   "TARGET_NEON"
1005   {
1006     machine_mode mode = GET_MODE (operands[1]);
1007     rtx p1, p2;
1009     p1  = arm_simd_vect_par_cnst_half (mode, false);
1010     p2  = arm_simd_vect_par_cnst_half (mode, true);
1012     if (operands[0] != operands[2])
1013       emit_move_insn (operands[0], operands[2]);
1015     emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1016                                                          operands[1],
1017                                                          p1,
1018                                                          operands[0]));
1019     emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1020                                                          operands[1],
1021                                                          p2,
1022                                                          operands[0]));
1023     DONE;
1024   }
1027 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1028   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1029         (plus:<V_double_width>
1030          (zero_extend:<V_double_width>
1031           (vec_select:<V_HALF>
1032            (match_operand:VQI 1 "s_register_operand" "%w")
1033            (match_operand:VQI 2 "vect_par_constant_low" "")))
1034          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1035   "TARGET_NEON"
1037   return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1038     "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1040   [(set_attr "type" "neon_add_widen")])
1042 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1043   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1044         (plus:<V_double_width>
1045          (zero_extend:<V_double_width>
1046           (vec_select:<V_HALF>
1047                          (match_operand:VQI 1 "s_register_operand" "%w")
1048                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1049          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1050   "TARGET_NEON"
1052  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1053     "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1055   [(set_attr "type" "neon_add_widen")])
1057 (define_insn "widen_usum<mode>3"
1058   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1059         (plus:<V_widen> (zero_extend:<V_widen>
1060                           (match_operand:VW 1 "s_register_operand" "%w"))
1061                         (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1062   "TARGET_NEON"
1063   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1064   [(set_attr "type" "neon_add_widen")]
1067 ;; Helpers for quad-word reduction operations
1069 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1070 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1071 ; N/2-element vector.
1073 (define_insn "quad_halves_<code>v4si"
1074   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1075         (VQH_OPS:V2SI
1076           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1077                            (parallel [(const_int 0) (const_int 1)]))
1078           (vec_select:V2SI (match_dup 1)
1079                            (parallel [(const_int 2) (const_int 3)]))))]
1080   "TARGET_NEON"
1081   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1082   [(set_attr "vqh_mnem" "<VQH_mnem>")
1083    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1086 (define_insn "quad_halves_<code>v4sf"
1087   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1088         (VQHS_OPS:V2SF
1089           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1090                            (parallel [(const_int 0) (const_int 1)]))
1091           (vec_select:V2SF (match_dup 1)
1092                            (parallel [(const_int 2) (const_int 3)]))))]
1093   "ARM_HAVE_NEON_V4SF_ARITH"
1094   "<VQH_mnem>.f32\t%P0, %e1, %f1"
1095   [(set_attr "vqh_mnem" "<VQH_mnem>")
1096    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1099 (define_insn "quad_halves_<code>v8hi"
1100   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1101         (VQH_OPS:V4HI
1102           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1103                            (parallel [(const_int 0) (const_int 1)
1104                                       (const_int 2) (const_int 3)]))
1105           (vec_select:V4HI (match_dup 1)
1106                            (parallel [(const_int 4) (const_int 5)
1107                                       (const_int 6) (const_int 7)]))))]
1108   "TARGET_NEON"
1109   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1110   [(set_attr "vqh_mnem" "<VQH_mnem>")
1111    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1114 (define_insn "quad_halves_<code>v16qi"
1115   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1116         (VQH_OPS:V8QI
1117           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1118                            (parallel [(const_int 0) (const_int 1)
1119                                       (const_int 2) (const_int 3)
1120                                       (const_int 4) (const_int 5)
1121                                       (const_int 6) (const_int 7)]))
1122           (vec_select:V8QI (match_dup 1)
1123                            (parallel [(const_int 8) (const_int 9)
1124                                       (const_int 10) (const_int 11)
1125                                       (const_int 12) (const_int 13)
1126                                       (const_int 14) (const_int 15)]))))]
1127   "TARGET_NEON"
1128   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1129   [(set_attr "vqh_mnem" "<VQH_mnem>")
1130    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1133 (define_expand "move_hi_quad_<mode>"
1134  [(match_operand:ANY128 0 "s_register_operand")
1135   (match_operand:<V_HALF> 1 "s_register_operand")]
1136  "TARGET_NEON"
1138   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1139                                        GET_MODE_SIZE (<V_HALF>mode)),
1140                   operands[1]);
1141   DONE;
1144 (define_expand "move_lo_quad_<mode>"
1145  [(match_operand:ANY128 0 "s_register_operand")
1146   (match_operand:<V_HALF> 1 "s_register_operand")]
1147  "TARGET_NEON"
1149   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1150                                        <MODE>mode, 0),
1151                   operands[1]);
1152   DONE;
1155 ;; Reduction operations
1157 (define_expand "reduc_plus_scal_<mode>"
1158   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1159    (match_operand:VD 1 "s_register_operand")]
1160   "ARM_HAVE_NEON_<MODE>_ARITH"
1162   rtx vec = gen_reg_rtx (<MODE>mode);
1163   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1164                         &gen_neon_vpadd_internal<mode>);
1165   /* The same result is actually computed into every element.  */
1166   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1167   DONE;
1170 (define_expand "reduc_plus_scal_v2di"
1171   [(match_operand:DI 0 "nonimmediate_operand")
1172    (match_operand:V2DI 1 "s_register_operand")]
1173   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1175   rtx vec = gen_reg_rtx (V2DImode);
1177   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1178   emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1180   DONE;
1183 (define_insn "arm_reduc_plus_internal_v2di"
1184   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1185         (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1186                      UNSPEC_VPADD))]
1187   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1188   "vadd.i64\t%e0, %e1, %f1"
1189   [(set_attr "type" "neon_add_q")]
1192 (define_expand "reduc_smin_scal_<mode>"
1193   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1194    (match_operand:VD 1 "s_register_operand")]
1195   "ARM_HAVE_NEON_<MODE>_ARITH"
1197   rtx vec = gen_reg_rtx (<MODE>mode);
1199   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1200                         &gen_neon_vpsmin<mode>);
1201   /* The result is computed into every element of the vector.  */
1202   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1203   DONE;
1206 (define_expand "reduc_smin_scal_<mode>"
1207   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1208    (match_operand:VQ 1 "s_register_operand")]
1209   "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1211   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1213   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1214   emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1216   DONE;
1219 (define_expand "reduc_smax_scal_<mode>"
1220   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1221    (match_operand:VD 1 "s_register_operand")]
1222   "ARM_HAVE_NEON_<MODE>_ARITH"
1224   rtx vec = gen_reg_rtx (<MODE>mode);
1225   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1226                         &gen_neon_vpsmax<mode>);
1227   /* The result is computed into every element of the vector.  */
1228   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1229   DONE;
1232 (define_expand "reduc_smax_scal_<mode>"
1233   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1234    (match_operand:VQ 1 "s_register_operand")]
1235   "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1237   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1239   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1240   emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1242   DONE;
1245 (define_expand "reduc_umin_scal_<mode>"
1246   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1247    (match_operand:VDI 1 "s_register_operand")]
1248   "TARGET_NEON"
1250   rtx vec = gen_reg_rtx (<MODE>mode);
1251   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1252                         &gen_neon_vpumin<mode>);
1253   /* The result is computed into every element of the vector.  */
1254   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1255   DONE;
1258 (define_expand "reduc_umin_scal_<mode>"
1259   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1260    (match_operand:VQI 1 "s_register_operand")]
1261   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1263   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1265   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1266   emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1268   DONE;
1271 (define_expand "reduc_umax_scal_<mode>"
1272   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1273    (match_operand:VDI 1 "s_register_operand")]
1274   "TARGET_NEON"
1276   rtx vec = gen_reg_rtx (<MODE>mode);
1277   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1278                         &gen_neon_vpumax<mode>);
1279   /* The result is computed into every element of the vector.  */
1280   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1281   DONE;
1284 (define_expand "reduc_umax_scal_<mode>"
1285   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1286    (match_operand:VQI 1 "s_register_operand")]
1287   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1289   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1291   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1292   emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1294   DONE;
1297 (define_insn "neon_vpadd_internal<mode>"
1298   [(set (match_operand:VD 0 "s_register_operand" "=w")
1299         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1300                     (match_operand:VD 2 "s_register_operand" "w")]
1301                    UNSPEC_VPADD))]
1302   "TARGET_NEON"
1303   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1304   ;; Assume this schedules like vadd.
1305   [(set (attr "type")
1306       (if_then_else (match_test "<Is_float_mode>")
1307                     (const_string "neon_fp_reduc_add_s<q>")
1308                     (const_string "neon_reduc_add<q>")))]
1311 (define_insn "neon_vpaddv4hf"
1312  [(set
1313    (match_operand:V4HF 0 "s_register_operand" "=w")
1314    (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1315                  (match_operand:V4HF 2 "s_register_operand" "w")]
1316     UNSPEC_VPADD))]
1317  "TARGET_NEON_FP16INST"
1318  "vpadd.f16\t%P0, %P1, %P2"
1319  [(set_attr "type" "neon_reduc_add")]
1322 (define_insn "neon_vpsmin<mode>"
1323   [(set (match_operand:VD 0 "s_register_operand" "=w")
1324         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1325                     (match_operand:VD 2 "s_register_operand" "w")]
1326                    UNSPEC_VPSMIN))]
1327   "TARGET_NEON"
1328   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1329   [(set (attr "type")
1330       (if_then_else (match_test "<Is_float_mode>")
1331                     (const_string "neon_fp_reduc_minmax_s<q>")
1332                     (const_string "neon_reduc_minmax<q>")))]
1335 (define_insn "neon_vpsmax<mode>"
1336   [(set (match_operand:VD 0 "s_register_operand" "=w")
1337         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1338                     (match_operand:VD 2 "s_register_operand" "w")]
1339                    UNSPEC_VPSMAX))]
1340   "TARGET_NEON"
1341   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1342   [(set (attr "type")
1343       (if_then_else (match_test "<Is_float_mode>")
1344                     (const_string "neon_fp_reduc_minmax_s<q>")
1345                     (const_string "neon_reduc_minmax<q>")))]
1348 (define_insn "neon_vpumin<mode>"
1349   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1350         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1351                      (match_operand:VDI 2 "s_register_operand" "w")]
1352                    UNSPEC_VPUMIN))]
1353   "TARGET_NEON"
1354   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1355   [(set_attr "type" "neon_reduc_minmax<q>")]
1358 (define_insn "neon_vpumax<mode>"
1359   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1360         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1361                      (match_operand:VDI 2 "s_register_operand" "w")]
1362                    UNSPEC_VPUMAX))]
1363   "TARGET_NEON"
1364   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1365   [(set_attr "type" "neon_reduc_minmax<q>")]
1368 ;; Saturating arithmetic
1370 ; NOTE: Neon supports many more saturating variants of instructions than the
1371 ; following, but these are all GCC currently understands.
1372 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1373 ; yet either, although these patterns may be used by intrinsics when they're
1374 ; added.
1376 (define_insn "*ss_add<mode>_neon"
1377   [(set (match_operand:VD 0 "s_register_operand" "=w")
1378        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1379                    (match_operand:VD 2 "s_register_operand" "w")))]
1380   "TARGET_NEON"
1381   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1382   [(set_attr "type" "neon_qadd<q>")]
1385 (define_insn "*us_add<mode>_neon"
1386   [(set (match_operand:VD 0 "s_register_operand" "=w")
1387        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1388                    (match_operand:VD 2 "s_register_operand" "w")))]
1389   "TARGET_NEON"
1390   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1391   [(set_attr "type" "neon_qadd<q>")]
1394 (define_insn "*ss_sub<mode>_neon"
1395   [(set (match_operand:VD 0 "s_register_operand" "=w")
1396        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1397                     (match_operand:VD 2 "s_register_operand" "w")))]
1398   "TARGET_NEON"
1399   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1400   [(set_attr "type" "neon_qsub<q>")]
1403 (define_insn "*us_sub<mode>_neon"
1404   [(set (match_operand:VD 0 "s_register_operand" "=w")
1405        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1406                     (match_operand:VD 2 "s_register_operand" "w")))]
1407   "TARGET_NEON"
1408   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1409   [(set_attr "type" "neon_qsub<q>")]
1412 (define_expand "vec_cmp<mode><v_cmp_result>"
1413   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1414         (match_operator:<V_cmp_result> 1 "comparison_operator"
1415           [(match_operand:VDQWH 2 "s_register_operand")
1416            (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
1417   "TARGET_NEON
1418    && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1420   arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1421                              operands[2], operands[3], false);
1422   DONE;
1425 (define_expand "vec_cmpu<mode><mode>"
1426   [(set (match_operand:VDQIW 0 "s_register_operand")
1427         (match_operator:VDQIW 1 "comparison_operator"
1428           [(match_operand:VDQIW 2 "s_register_operand")
1429            (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1430   "TARGET_NEON"
1432   arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1433                              operands[2], operands[3], false);
1434   DONE;
1437 (define_expand "vcond_mask_<mode><v_cmp_result>"
1438   [(set (match_operand:VDQWH 0 "s_register_operand")
1439         (if_then_else:VDQWH
1440           (match_operand:<V_cmp_result> 3 "s_register_operand")
1441           (match_operand:VDQWH 1 "s_register_operand")
1442           (match_operand:VDQWH 2 "s_register_operand")))]
1443   "TARGET_NEON
1444    && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1446   emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1447                                   operands[2]));
1448   DONE;
1451 ;; Patterns for builtins.
1453 ; good for plain vadd, vaddq.
1455 (define_expand "neon_vadd<mode>"
1456   [(match_operand:VCVTF 0 "s_register_operand")
1457    (match_operand:VCVTF 1 "s_register_operand")
1458    (match_operand:VCVTF 2 "s_register_operand")]
1459   "TARGET_NEON"
1461   if (ARM_HAVE_NEON_<MODE>_ARITH)
1462     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1463   else
1464     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1465                                            operands[2]));
1466   DONE;
1469 (define_expand "neon_vadd<mode>"
1470   [(match_operand:VH 0 "s_register_operand")
1471    (match_operand:VH 1 "s_register_operand")
1472    (match_operand:VH 2 "s_register_operand")]
1473   "TARGET_NEON_FP16INST"
1475   emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1476   DONE;
1479 (define_expand "neon_vsub<mode>"
1480   [(match_operand:VH 0 "s_register_operand")
1481    (match_operand:VH 1 "s_register_operand")
1482    (match_operand:VH 2 "s_register_operand")]
1483   "TARGET_NEON_FP16INST"
1485   emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
1486   DONE;
1489 ; Note that NEON operations don't support the full IEEE 754 standard: in
1490 ; particular, denormal values are flushed to zero.  This means that GCC cannot
1491 ; use those instructions for autovectorization, etc. unless
1492 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1493 ; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
1494 ; header) must work in either case: if -funsafe-math-optimizations is given,
1495 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1496 ; expand to unspecs (which may potentially limit the extent to which they might
1497 ; be optimized by generic code).
1499 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1501 (define_insn "neon_vadd<mode>_unspec"
1502   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1503         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1504                       (match_operand:VCVTF 2 "s_register_operand" "w")]
1505                      UNSPEC_VADD))]
1506   "TARGET_NEON"
1507   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1508   [(set (attr "type")
1509       (if_then_else (match_test "<Is_float_mode>")
1510                     (const_string "neon_fp_addsub_s<q>")
1511                     (const_string "neon_add<q>")))]
1514 (define_insn "neon_vaddl<sup><mode>"
1515   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1516         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1517                            (match_operand:VDI 2 "s_register_operand" "w")]
1518                           VADDL))]
1519   "TARGET_NEON"
1520   "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1521   [(set_attr "type" "neon_add_long")]
1524 (define_insn "neon_vaddw<sup><mode>"
1525   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1526         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1527                            (match_operand:VDI 2 "s_register_operand" "w")]
1528                           VADDW))]
1529   "TARGET_NEON"
1530   "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1531   [(set_attr "type" "neon_add_widen")]
1534 ; vhadd and vrhadd.
1536 (define_insn "@neon_v<r>hadd<sup><mode>"
1537   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1538         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1539                        (match_operand:VDQIW 2 "s_register_operand" "w")]
1540                       VHADD))]
1541   "TARGET_NEON"
1542   "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1543   [(set_attr "type" "neon_add_halve_q")]
1546 (define_insn "neon_vqadd<sup><mode>"
1547   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1548         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1549                        (match_operand:VDQIX 2 "s_register_operand" "w")]
1550                      VQADD))]
1551   "TARGET_NEON"
1552   "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1553   [(set_attr "type" "neon_qadd<q>")]
1556 (define_insn "neon_v<r>addhn<mode>"
1557   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1558         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1559                             (match_operand:VN 2 "s_register_operand" "w")]
1560                            VADDHN))]
1561   "TARGET_NEON"
1562   "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1563   [(set_attr "type" "neon_add_halve_narrow_q")]
1566 ;; Polynomial and Float multiplication.
1567 (define_insn "neon_vmul<pf><mode>"
1568   [(set (match_operand:VPF 0 "s_register_operand" "=w")
1569         (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1570                       (match_operand:VPF 2 "s_register_operand" "w")]
1571                      UNSPEC_VMUL))]
1572   "TARGET_NEON"
1573   "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1574   [(set (attr "type")
1575       (if_then_else (match_test "<Is_float_mode>")
1576                     (const_string "neon_fp_mul_s<q>")
1577                     (const_string "neon_mul_<V_elem_ch><q>")))]
1580 (define_insn "neon_vmulf<mode>"
1581  [(set
1582    (match_operand:VH 0 "s_register_operand" "=w")
1583    (mult:VH
1584     (match_operand:VH 1 "s_register_operand" "w")
1585     (match_operand:VH 2 "s_register_operand" "w")))]
1586   "TARGET_NEON_FP16INST"
1587   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1588  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1591 (define_expand "neon_vmla<mode>"
1592   [(match_operand:VDQW 0 "s_register_operand")
1593    (match_operand:VDQW 1 "s_register_operand")
1594    (match_operand:VDQW 2 "s_register_operand")
1595    (match_operand:VDQW 3 "s_register_operand")]
1596   "TARGET_NEON"
1598   if (ARM_HAVE_NEON_<MODE>_ARITH)
1599     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1600                                              operands[2], operands[3]));
1601   else
1602     emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1603                                            operands[2], operands[3]));
1604   DONE;
1607 (define_expand "neon_vfma<VCVTF:mode>"
1608   [(match_operand:VCVTF 0 "s_register_operand")
1609    (match_operand:VCVTF 1 "s_register_operand")
1610    (match_operand:VCVTF 2 "s_register_operand")
1611    (match_operand:VCVTF 3 "s_register_operand")]
1612   "TARGET_NEON && TARGET_FMA"
1614   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1615                                        operands[1]));
1616   DONE;
1619 (define_expand "neon_vfma<VH:mode>"
1620   [(match_operand:VH 0 "s_register_operand")
1621    (match_operand:VH 1 "s_register_operand")
1622    (match_operand:VH 2 "s_register_operand")
1623    (match_operand:VH 3 "s_register_operand")]
1624   "TARGET_NEON_FP16INST"
1626   emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1627                              operands[1]));
1628   DONE;
1631 (define_expand "neon_vfms<VCVTF:mode>"
1632   [(match_operand:VCVTF 0 "s_register_operand")
1633    (match_operand:VCVTF 1 "s_register_operand")
1634    (match_operand:VCVTF 2 "s_register_operand")
1635    (match_operand:VCVTF 3 "s_register_operand")]
1636   "TARGET_NEON && TARGET_FMA"
1638   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1639                                          operands[1]));
1640   DONE;
1643 (define_expand "neon_vfms<VH:mode>"
1644   [(match_operand:VH 0 "s_register_operand")
1645    (match_operand:VH 1 "s_register_operand")
1646    (match_operand:VH 2 "s_register_operand")
1647    (match_operand:VH 3 "s_register_operand")]
1648   "TARGET_NEON_FP16INST"
1650   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1651                                          operands[1]));
1652   DONE;
1655 ;; The expand RTL structure here is not important.
1656 ;; We use the gen_* functions anyway.
1657 ;; We just need something to wrap the iterators around.
1659 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1660   [(set (match_operand:VCVTF 0 "s_register_operand")
1661      (unspec:VCVTF
1662         [(match_operand:VCVTF 1 "s_register_operand")
1663            (PLUSMINUS:<VFML>
1664              (match_operand:<VFML> 2 "s_register_operand")
1665              (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1666   "TARGET_FP16FML"
1668   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1669   emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1670                                                              operands[1],
1671                                                              operands[2],
1672                                                              operands[3],
1673                                                              half, half));
1674   DONE;
1677 (define_insn "vfmal_low<mode>_intrinsic"
1678  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1679         (fma:VCVTF
1680          (float_extend:VCVTF
1681           (vec_select:<VFMLSEL>
1682            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1683            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1684          (float_extend:VCVTF
1685           (vec_select:<VFMLSEL>
1686            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1687            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1688          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1689  "TARGET_FP16FML"
1690  "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1691  [(set_attr "type" "neon_fp_mla_s<q>")]
1694 (define_insn "vfmsl_high<mode>_intrinsic"
1695  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1696         (fma:VCVTF
1697          (float_extend:VCVTF
1698           (neg:<VFMLSEL>
1699             (vec_select:<VFMLSEL>
1700               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1701               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1702          (float_extend:VCVTF
1703           (vec_select:<VFMLSEL>
1704            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1705            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1706          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1707  "TARGET_FP16FML"
1708  "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1709  [(set_attr "type" "neon_fp_mla_s<q>")]
1712 (define_insn "vfmal_high<mode>_intrinsic"
1713  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1714         (fma:VCVTF
1715          (float_extend:VCVTF
1716           (vec_select:<VFMLSEL>
1717            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1718            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1719          (float_extend:VCVTF
1720           (vec_select:<VFMLSEL>
1721            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1722            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1723          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1724  "TARGET_FP16FML"
1725  "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1726  [(set_attr "type" "neon_fp_mla_s<q>")]
1729 (define_insn "vfmsl_low<mode>_intrinsic"
1730  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1731         (fma:VCVTF
1732          (float_extend:VCVTF
1733           (neg:<VFMLSEL>
1734             (vec_select:<VFMLSEL>
1735               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1736               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1737          (float_extend:VCVTF
1738           (vec_select:<VFMLSEL>
1739            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1740            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1741          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1742  "TARGET_FP16FML"
1743  "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1744  [(set_attr "type" "neon_fp_mla_s<q>")]
1747 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1748   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1749      (unspec:VCVTF
1750         [(match_operand:VCVTF 1 "s_register_operand")
1751          (PLUSMINUS:<VFML>
1752            (match_operand:<VFML> 2 "s_register_operand")
1753            (match_operand:<VFML> 3 "s_register_operand"))
1754          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1755   "TARGET_FP16FML"
1757   rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1758   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1759   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1760                                                (operands[0], operands[1],
1761                                                 operands[2], operands[3],
1762                                                 half, lane));
1763   DONE;
1766 (define_insn "vfmal_lane_low<mode>_intrinsic"
1767  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1768         (fma:VCVTF
1769          (float_extend:VCVTF
1770           (vec_select:<VFMLSEL>
1771            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1772            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1773          (float_extend:VCVTF
1774            (vec_duplicate:<VFMLSEL>
1775              (vec_select:HF
1776                (match_operand:<VFML> 3 "s_register_operand" "x")
1777                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1778          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1779  "TARGET_FP16FML"
1781     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1782     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1783       {
1784         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1785         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1786       }
1787     else
1788       {
1789         operands[5] = GEN_INT (lane);
1790         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1791       }
1792   }
1793  [(set_attr "type" "neon_fp_mla_s<q>")]
1796 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1797   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1798      (unspec:VCVTF
1799         [(match_operand:VCVTF 1 "s_register_operand")
1800          (PLUSMINUS:<VFML>
1801            (match_operand:<VFML> 2 "s_register_operand")
1802            (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1803          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1804   "TARGET_FP16FML"
1806   rtx lane
1807     = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1808   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1809   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1810                 (operands[0], operands[1], operands[2], operands[3],
1811                  half, lane));
1812   DONE;
1815 ;; Used to implement the intrinsics:
1816 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1817 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1818 ;; Needs a bit of care to get the modes of the different sub-expressions right
1819 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1820 ;; S or D subregister to select the appropriate lane from.
1822 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1823  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1824         (fma:VCVTF
1825          (float_extend:VCVTF
1826           (vec_select:<VFMLSEL>
1827            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1828            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1829          (float_extend:VCVTF
1830            (vec_duplicate:<VFMLSEL>
1831              (vec_select:HF
1832                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1833                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1834          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1835  "TARGET_FP16FML"
1837    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1838    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1839    int new_lane = lane % elts_per_reg;
1840    int regdiff = lane / elts_per_reg;
1841    operands[5] = GEN_INT (new_lane);
1842    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1843       because we want the print_operand code to print the appropriate
1844       S or D register prefix.  */
1845    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1846    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1847    return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1849  [(set_attr "type" "neon_fp_mla_s<q>")]
1852 ;; Used to implement the intrinsics:
1853 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1854 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1855 ;; Needs a bit of care to get the modes of the different sub-expressions right
1856 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1857 ;; S or D subregister to select the appropriate lane from.
1859 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1860  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1861         (fma:VCVTF
1862          (float_extend:VCVTF
1863           (vec_select:<VFMLSEL>
1864            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1865            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1866          (float_extend:VCVTF
1867            (vec_duplicate:<VFMLSEL>
1868              (vec_select:HF
1869                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1870                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1871          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1872  "TARGET_FP16FML"
1874    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1875    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1876    int new_lane = lane % elts_per_reg;
1877    int regdiff = lane / elts_per_reg;
1878    operands[5] = GEN_INT (new_lane);
1879    /* We re-create operands[3] in the halved VFMLSEL mode
1880       because we've calculated the correct half-width subreg to extract
1881       the lane from and we want to print *that* subreg instead.  */
1882    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1883    return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
1885  [(set_attr "type" "neon_fp_mla_s<q>")]
1888 (define_insn "vfmal_lane_high<mode>_intrinsic"
1889  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1890         (fma:VCVTF
1891          (float_extend:VCVTF
1892           (vec_select:<VFMLSEL>
1893            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1894            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1895          (float_extend:VCVTF
1896            (vec_duplicate:<VFMLSEL>
1897              (vec_select:HF
1898                (match_operand:<VFML> 3 "s_register_operand" "x")
1899                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1900          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1901  "TARGET_FP16FML"
1902   {
1903     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1904     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1905       {
1906         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1907         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
1908       }
1909     else
1910       {
1911         operands[5] = GEN_INT (lane);
1912         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
1913       }
1914   }
1915  [(set_attr "type" "neon_fp_mla_s<q>")]
1918 (define_insn "vfmsl_lane_low<mode>_intrinsic"
1919  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1920         (fma:VCVTF
1921          (float_extend:VCVTF
1922           (neg:<VFMLSEL>
1923             (vec_select:<VFMLSEL>
1924               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1925               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1926          (float_extend:VCVTF
1927            (vec_duplicate:<VFMLSEL>
1928              (vec_select:HF
1929                (match_operand:<VFML> 3 "s_register_operand" "x")
1930                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1931          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1932  "TARGET_FP16FML"
1934     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1935     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1936       {
1937         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1938         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1939       }
1940     else
1941       {
1942         operands[5] = GEN_INT (lane);
1943         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1944       }
1945   }
1946  [(set_attr "type" "neon_fp_mla_s<q>")]
1949 ;; Used to implement the intrinsics:
1950 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1951 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1952 ;; Needs a bit of care to get the modes of the different sub-expressions right
1953 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1954 ;; S or D subregister to select the appropriate lane from.
1956 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
1957  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1958         (fma:VCVTF
1959          (float_extend:VCVTF
1960           (neg:<VFMLSEL>
1961             (vec_select:<VFMLSEL>
1962               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1963               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1964          (float_extend:VCVTF
1965            (vec_duplicate:<VFMLSEL>
1966              (vec_select:HF
1967                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1968                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1969          (match_operand:VCVTF 1 "s_register_operand" "0")))]
1970  "TARGET_FP16FML"
1972    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1973    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1974    int new_lane = lane % elts_per_reg;
1975    int regdiff = lane / elts_per_reg;
1976    operands[5] = GEN_INT (new_lane);
1977    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1978       because we want the print_operand code to print the appropriate
1979       S or D register prefix.  */
1980    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1981    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1982    return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1984  [(set_attr "type" "neon_fp_mla_s<q>")]
1987 ;; Used to implement the intrinsics:
1988 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1989 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1990 ;; Needs a bit of care to get the modes of the different sub-expressions right
1991 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1992 ;; S or D subregister to select the appropriate lane from.
1994 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
1995  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1996         (fma:VCVTF
1997          (float_extend:VCVTF
1998           (neg:<VFMLSEL>
1999             (vec_select:<VFMLSEL>
2000              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2001              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2002          (float_extend:VCVTF
2003            (vec_duplicate:<VFMLSEL>
2004              (vec_select:HF
2005                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2006                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2007          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2008  "TARGET_FP16FML"
2010    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2011    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2012    int new_lane = lane % elts_per_reg;
2013    int regdiff = lane / elts_per_reg;
2014    operands[5] = GEN_INT (new_lane);
2015    /* We re-create operands[3] in the halved VFMLSEL mode
2016       because we've calculated the correct half-width subreg to extract
2017       the lane from and we want to print *that* subreg instead.  */
2018    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2019    return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2021  [(set_attr "type" "neon_fp_mla_s<q>")]
2024 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2025  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2026         (fma:VCVTF
2027          (float_extend:VCVTF
2028           (neg:<VFMLSEL>
2029             (vec_select:<VFMLSEL>
2030              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2031              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2032          (float_extend:VCVTF
2033            (vec_duplicate:<VFMLSEL>
2034              (vec_select:HF
2035                (match_operand:<VFML> 3 "s_register_operand" "x")
2036                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2037          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2038  "TARGET_FP16FML"
2039   {
2040     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2041     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2042       {
2043         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2044         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2045       }
2046     else
2047       {
2048         operands[5] = GEN_INT (lane);
2049         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2050       }
2051   }
2052  [(set_attr "type" "neon_fp_mla_s<q>")]
2055 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2057 (define_insn "neon_vmla<mode>_unspec"
2058   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2059         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2060                       (match_operand:VDQW 2 "s_register_operand" "w")
2061                       (match_operand:VDQW 3 "s_register_operand" "w")]
2062                     UNSPEC_VMLA))]
2063   "TARGET_NEON"
2064   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2065   [(set (attr "type")
2066       (if_then_else (match_test "<Is_float_mode>")
2067                     (const_string "neon_fp_mla_s<q>")
2068                     (const_string "neon_mla_<V_elem_ch><q>")))]
2071 (define_insn "neon_vmlal<sup><mode>"
2072   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2073         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2074                            (match_operand:VW 2 "s_register_operand" "w")
2075                            (match_operand:VW 3 "s_register_operand" "w")]
2076                           VMLAL))]
2077   "TARGET_NEON"
2078   "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2079   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2082 (define_expand "neon_vmls<mode>"
2083   [(match_operand:VDQW 0 "s_register_operand")
2084    (match_operand:VDQW 1 "s_register_operand")
2085    (match_operand:VDQW 2 "s_register_operand")
2086    (match_operand:VDQW 3 "s_register_operand")]
2087   "TARGET_NEON"
2089   if (ARM_HAVE_NEON_<MODE>_ARITH)
2090     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2091                  operands[1], operands[2], operands[3]));
2092   else
2093     emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2094                                            operands[2], operands[3]));
2095   DONE;
2098 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2100 (define_insn "neon_vmls<mode>_unspec"
2101   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2102         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2103                       (match_operand:VDQW 2 "s_register_operand" "w")
2104                       (match_operand:VDQW 3 "s_register_operand" "w")]
2105                     UNSPEC_VMLS))]
2106   "TARGET_NEON"
2107   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2108   [(set (attr "type")
2109       (if_then_else (match_test "<Is_float_mode>")
2110                     (const_string "neon_fp_mla_s<q>")
2111                     (const_string "neon_mla_<V_elem_ch><q>")))]
2114 (define_insn "neon_vmlsl<sup><mode>"
2115   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2116         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2117                            (match_operand:VW 2 "s_register_operand" "w")
2118                            (match_operand:VW 3 "s_register_operand" "w")]
2119                           VMLSL))]
2120   "TARGET_NEON"
2121   "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2122   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2125 ;; vqdmulh, vqrdmulh
2126 (define_insn "neon_vq<r>dmulh<mode>"
2127   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2128         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2129                        (match_operand:VMDQI 2 "s_register_operand" "w")]
2130                       VQDMULH))]
2131   "TARGET_NEON"
2132   "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2133   [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2136 ;; vqrdmlah, vqrdmlsh
2137 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2138   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2139         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2140                        (match_operand:VMDQI 2 "s_register_operand" "w")
2141                        (match_operand:VMDQI 3 "s_register_operand" "w")]
2142                       VQRDMLH_AS))]
2143   "TARGET_NEON_RDMA"
2144   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2145   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2148 (define_insn "neon_vqdmlal<mode>"
2149   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2150         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2151                            (match_operand:VMDI 2 "s_register_operand" "w")
2152                            (match_operand:VMDI 3 "s_register_operand" "w")]
2153                           UNSPEC_VQDMLAL))]
2154   "TARGET_NEON"
2155   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2156   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2159 (define_insn "neon_vqdmlsl<mode>"
2160   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2161         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2162                            (match_operand:VMDI 2 "s_register_operand" "w")
2163                            (match_operand:VMDI 3 "s_register_operand" "w")]
2164                           UNSPEC_VQDMLSL))]
2165   "TARGET_NEON"
2166   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2167   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2170 (define_insn "neon_vmull<sup><mode>"
2171   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2172         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2173                            (match_operand:VW 2 "s_register_operand" "w")]
2174                           VMULL))]
2175   "TARGET_NEON"
2176   "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2177   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2180 (define_insn "neon_vqdmull<mode>"
2181   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2182         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2183                            (match_operand:VMDI 2 "s_register_operand" "w")]
2184                           UNSPEC_VQDMULL))]
2185   "TARGET_NEON"
2186   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2187   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2190 (define_expand "neon_vsub<mode>"
2191   [(match_operand:VCVTF 0 "s_register_operand")
2192    (match_operand:VCVTF 1 "s_register_operand")
2193    (match_operand:VCVTF 2 "s_register_operand")]
2194   "TARGET_NEON"
2196   if (ARM_HAVE_NEON_<MODE>_ARITH)
2197     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2198   else
2199     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2200                                            operands[2]));
2201   DONE;
2204 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2206 (define_insn "neon_vsub<mode>_unspec"
2207   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2208         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2209                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2210                      UNSPEC_VSUB))]
2211   "TARGET_NEON"
2212   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2213   [(set (attr "type")
2214       (if_then_else (match_test "<Is_float_mode>")
2215                     (const_string "neon_fp_addsub_s<q>")
2216                     (const_string "neon_sub<q>")))]
2219 (define_insn "neon_vsubl<sup><mode>"
2220   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2221         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2222                            (match_operand:VDI 2 "s_register_operand" "w")]
2223                           VSUBL))]
2224   "TARGET_NEON"
2225   "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2226   [(set_attr "type" "neon_sub_long")]
2229 (define_insn "neon_vsubw<sup><mode>"
2230   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2231         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2232                            (match_operand:VDI 2 "s_register_operand" "w")]
2233                           VSUBW))]
2234   "TARGET_NEON"
2235   "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2236   [(set_attr "type" "neon_sub_widen")]
2239 (define_insn "neon_vqsub<sup><mode>"
2240   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2241         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2242                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2243                       VQSUB))]
2244   "TARGET_NEON"
2245   "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2246   [(set_attr "type" "neon_qsub<q>")]
2249 (define_insn "neon_vhsub<sup><mode>"
2250   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2251         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2252                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2253                       VHSUB))]
2254   "TARGET_NEON"
2255   "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2256   [(set_attr "type" "neon_sub_halve<q>")]
2259 (define_insn "neon_v<r>subhn<mode>"
2260   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2261         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2262                             (match_operand:VN 2 "s_register_operand" "w")]
2263                            VSUBHN))]
2264   "TARGET_NEON"
2265   "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2266   [(set_attr "type" "neon_sub_halve_narrow_q")]
2269 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2270 ;; without unsafe math optimizations.
2271 (define_expand "@neon_vc<cmp_op><mode>"
2272   [(match_operand:<V_cmp_result> 0 "s_register_operand")
2273      (neg:<V_cmp_result>
2274        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2275                          (match_operand:VDQW 2 "reg_or_zero_operand")))]
2276   "TARGET_NEON"
2277   {
2278     /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2279        are enabled.  */
2280     if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2281         && !flag_unsafe_math_optimizations)
2282       {
2283         /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2284            we define gen_neon_vceq<mode>_insn_unspec only for float modes
2285            whereas this expander iterates over the integer modes as well,
2286            but we will never expand to UNSPECs for the integer comparisons.  */
2287         switch (<MODE>mode)
2288           {
2289             case E_V2SFmode:
2290               emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2291                                                               operands[1],
2292                                                               operands[2]));
2293               break;
2294             case E_V4SFmode:
2295               emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2296                                                               operands[1],
2297                                                               operands[2]));
2298               break;
2299             default:
2300               gcc_unreachable ();
2301           }
2302       }
2303     else
2304       emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2305                                                  operands[1],
2306                                                  operands[2]));
2307     DONE;
2308   }
2311 (define_insn "@neon_vc<cmp_op><mode>_insn"
2312   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2313         (neg:<V_cmp_result>
2314           (COMPARISONS:<V_cmp_result>
2315             (match_operand:VDQW 1 "s_register_operand" "w,w")
2316             (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2317   "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2318                     && !flag_unsafe_math_optimizations)"
2319   {
2320     char pattern[100];
2321     sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2322                       " %%<V_reg>1, %s",
2323                        GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2324                          ? "f" : "<cmp_type>",
2325                        which_alternative == 0
2326                          ? "%<V_reg>2" : "#0");
2327     output_asm_insn (pattern, operands);
2328     return "";
2329   }
2330   [(set (attr "type")
2331         (if_then_else (match_operand 2 "zero_operand")
2332                       (const_string "neon_compare_zero<q>")
2333                       (const_string "neon_compare<q>")))]
2336 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2337   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2338         (unspec:<V_cmp_result>
2339           [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2340            (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2341           NEON_VCMP))]
2342   "TARGET_NEON"
2343   {
2344     char pattern[100];
2345     sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2346                        " %%<V_reg>1, %s",
2347                        which_alternative == 0
2348                          ? "%<V_reg>2" : "#0");
2349     output_asm_insn (pattern, operands);
2350     return "";
2352   [(set_attr "type" "neon_fp_compare_s<q>")]
2355 (define_expand "@neon_vc<cmp_op><mode>"
2356  [(match_operand:<V_cmp_result> 0 "s_register_operand")
2357   (neg:<V_cmp_result>
2358    (COMPARISONS:VH
2359     (match_operand:VH 1 "s_register_operand")
2360     (match_operand:VH 2 "reg_or_zero_operand")))]
2361  "TARGET_NEON_FP16INST"
2363   /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2364      are enabled.  */
2365   if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2366       && !flag_unsafe_math_optimizations)
2367     emit_insn
2368       (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2369        (operands[0], operands[1], operands[2]));
2370   else
2371     emit_insn
2372       (gen_neon_vc<cmp_op><mode>_fp16insn
2373        (operands[0], operands[1], operands[2]));
2374   DONE;
2377 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2378  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2379    (neg:<V_cmp_result>
2380     (COMPARISONS:<V_cmp_result>
2381      (match_operand:VH 1 "s_register_operand" "w,w")
2382      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2383  "TARGET_NEON_FP16INST
2384   && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2385   && !flag_unsafe_math_optimizations)"
2387   char pattern[100];
2388   sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2389            " %%<V_reg>1, %s",
2390            GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2391            ? "f" : "<cmp_type>",
2392            which_alternative == 0
2393            ? "%<V_reg>2" : "#0");
2394   output_asm_insn (pattern, operands);
2395   return "";
2397  [(set (attr "type")
2398    (if_then_else (match_operand 2 "zero_operand")
2399     (const_string "neon_compare_zero<q>")
2400     (const_string "neon_compare<q>")))])
2402 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2403  [(set
2404    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2405    (unspec:<V_cmp_result>
2406     [(match_operand:VH 1 "s_register_operand" "w,w")
2407      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2408     NEON_VCMP))]
2409  "TARGET_NEON_FP16INST"
2411   char pattern[100];
2412   sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2413            " %%<V_reg>1, %s",
2414            which_alternative == 0
2415            ? "%<V_reg>2" : "#0");
2416   output_asm_insn (pattern, operands);
2417   return "";
2419  [(set_attr "type" "neon_fp_compare_s<q>")])
2421 (define_insn "@neon_vc<code><mode>"
2422   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2423         (neg:<V_cmp_result>
2424           (GTUGEU:<V_cmp_result>
2425             (match_operand:VDQIW 1 "s_register_operand" "w")
2426             (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2427   "TARGET_NEON"
2428   "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2429   [(set_attr "type" "neon_compare<q>")]
2432 (define_expand "neon_vca<cmp_op><mode>"
2433   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2434         (neg:<V_cmp_result>
2435           (GLTE:<V_cmp_result>
2436             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2437             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2438   "TARGET_NEON"
2439   {
2440     if (flag_unsafe_math_optimizations)
2441       emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2442                                                   operands[2]));
2443     else
2444       emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2445                                                          operands[1],
2446                                                          operands[2]));
2447     DONE;
2448   }
2451 (define_insn "neon_vca<cmp_op><mode>_insn"
2452   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2453         (neg:<V_cmp_result>
2454           (GLTE:<V_cmp_result>
2455             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2456             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2457   "TARGET_NEON && flag_unsafe_math_optimizations"
2458   "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2459   [(set_attr "type" "neon_fp_compare_s<q>")]
2462 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2463   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2464         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2465                                 (match_operand:VCVTF 2 "s_register_operand" "w")]
2466                                NEON_VAGLTE))]
2467   "TARGET_NEON"
2468   "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2469   [(set_attr "type" "neon_fp_compare_s<q>")]
2472 (define_expand "neon_vca<cmp_op><mode>"
2473   [(set
2474     (match_operand:<V_cmp_result> 0 "s_register_operand")
2475     (neg:<V_cmp_result>
2476      (GLTE:<V_cmp_result>
2477       (abs:VH (match_operand:VH 1 "s_register_operand"))
2478       (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2479  "TARGET_NEON_FP16INST"
2481   if (flag_unsafe_math_optimizations)
2482     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2483                (operands[0], operands[1], operands[2]));
2484   else
2485     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2486                (operands[0], operands[1], operands[2]));
2487   DONE;
2490 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2491   [(set
2492     (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2493     (neg:<V_cmp_result>
2494      (GLTE:<V_cmp_result>
2495       (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2496       (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2497  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2498  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2499  [(set_attr "type" "neon_fp_compare_s<q>")]
2502 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2503  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2504    (unspec:<V_cmp_result>
2505     [(match_operand:VH 1 "s_register_operand" "w")
2506      (match_operand:VH 2 "s_register_operand" "w")]
2507     NEON_VAGLTE))]
2508  "TARGET_NEON"
2509  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2510  [(set_attr "type" "neon_fp_compare_s<q>")]
2513 (define_expand "neon_vc<cmp_op>z<mode>"
2514  [(set
2515    (match_operand:<V_cmp_result> 0 "s_register_operand")
2516    (COMPARISONS:<V_cmp_result>
2517     (match_operand:VH 1 "s_register_operand")
2518     (const_int 0)))]
2519  "TARGET_NEON_FP16INST"
2521   emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2522                                         CONST0_RTX (<MODE>mode)));
2523   DONE;
2526 (define_insn "neon_vtst_combine<mode>"
2527   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2528         (plus:VDQIW
2529           (eq:VDQIW
2530             (and:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
2531                        (match_operand:VDQIW 2 "s_register_operand" "w"))
2532             (match_operand:VDQIW 3 "zero_operand" "i"))
2533           (match_operand:VDQIW 4 "minus_one_operand" "i")))]
2534   "TARGET_NEON"
2535   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2536   [(set_attr "type" "neon_tst<q>")]
2539 (define_insn "neon_vabd<sup><mode>"
2540   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2541         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2542                       (match_operand:VDQIW 2 "s_register_operand" "w")]
2543                      VABD))]
2544   "TARGET_NEON"
2545   "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2546   [(set_attr "type" "neon_abd<q>")]
2549 (define_insn "neon_vabd<mode>"
2550   [(set (match_operand:VH 0 "s_register_operand" "=w")
2551     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2552                 (match_operand:VH 2 "s_register_operand" "w")]
2553      UNSPEC_VABD_F))]
2554  "TARGET_NEON_FP16INST"
2555  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2556   [(set_attr "type" "neon_abd<q>")]
2559 (define_insn "neon_vabdf<mode>"
2560   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2561         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2562                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2563                      UNSPEC_VABD_F))]
2564   "TARGET_NEON"
2565   "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2566   [(set_attr "type" "neon_fp_abd_s<q>")]
2569 (define_insn "neon_vabdl<sup><mode>"
2570   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2571         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2572                            (match_operand:VW 2 "s_register_operand" "w")]
2573                           VABDL))]
2574   "TARGET_NEON"
2575   "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2576   [(set_attr "type" "neon_abd_long")]
2579 (define_insn "neon_vaba<sup><mode>"
2580   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2581         (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2582                                    (match_operand:VDQIW 3 "s_register_operand" "w")]
2583                                   VABD)
2584                     (match_operand:VDQIW 1 "s_register_operand" "0")))]
2585   "TARGET_NEON"
2586   "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2587   [(set_attr "type" "neon_arith_acc<q>")]
2590 (define_insn "neon_vabal<sup><mode>"
2591   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2592         (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2593                                            (match_operand:VW 3 "s_register_operand" "w")]
2594                                            VABDL)
2595                          (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2596   "TARGET_NEON"
2597   "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2598   [(set_attr "type" "neon_arith_acc<q>")]
2601 (define_expand "<sup>sadv16qi"
2602   [(use (match_operand:V4SI 0 "register_operand"))
2603    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2604                   (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2605    (use (match_operand:V4SI 3 "register_operand"))]
2606   "TARGET_NEON"
2607   {
2608     rtx reduc = gen_reg_rtx (V8HImode);
2609     rtx op1_highpart = gen_reg_rtx (V8QImode);
2610     rtx op2_highpart = gen_reg_rtx (V8QImode);
2612     emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2613                                         gen_lowpart (V8QImode, operands[1]),
2614                                         gen_lowpart (V8QImode, operands[2])));
2616     emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2617     emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2618     emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2619                                         op1_highpart, op2_highpart));
2620     emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2622     emit_move_insn (operands[0], operands[3]);
2623     DONE;
2624   }
2627 (define_insn "neon_v<maxmin><sup><mode>"
2628   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2629         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2630                       (match_operand:VDQIW 2 "s_register_operand" "w")]
2631                      VMAXMIN))]
2632   "TARGET_NEON"
2633   "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2634   [(set_attr "type" "neon_minmax<q>")]
2637 (define_insn "neon_v<maxmin>f<mode>"
2638   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2639         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2640                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2641                      VMAXMINF))]
2642   "TARGET_NEON"
2643   "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2644   [(set_attr "type" "neon_fp_minmax_s<q>")]
2647 (define_insn "neon_v<maxmin>f<mode>"
2648  [(set (match_operand:VH 0 "s_register_operand" "=w")
2649    (unspec:VH
2650     [(match_operand:VH 1 "s_register_operand" "w")
2651      (match_operand:VH 2 "s_register_operand" "w")]
2652     VMAXMINF))]
2653  "TARGET_NEON_FP16INST"
2654  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655  [(set_attr "type" "neon_fp_minmax_s<q>")]
2658 (define_insn "neon_vp<maxmin>fv4hf"
2659  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2660    (unspec:V4HF
2661     [(match_operand:V4HF 1 "s_register_operand" "w")
2662      (match_operand:V4HF 2 "s_register_operand" "w")]
2663     VPMAXMINF))]
2664  "TARGET_NEON_FP16INST"
2665  "vp<maxmin>.f16\t%P0, %P1, %P2"
2666   [(set_attr "type" "neon_reduc_minmax")]
2669 (define_insn "neon_<fmaxmin_op><mode>"
2670  [(set
2671    (match_operand:VH 0 "s_register_operand" "=w")
2672    (unspec:VH
2673     [(match_operand:VH 1 "s_register_operand" "w")
2674      (match_operand:VH 2 "s_register_operand" "w")]
2675     VMAXMINFNM))]
2676  "TARGET_NEON_FP16INST"
2677  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2678  [(set_attr "type" "neon_fp_minmax_s<q>")]
2681 ;; v<maxmin>nm intrinsics.
2682 (define_insn "neon_<fmaxmin_op><mode>"
2683   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2684         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2685                        (match_operand:VCVTF 2 "s_register_operand" "w")]
2686                        VMAXMINFNM))]
2687   "TARGET_NEON && TARGET_VFP5"
2688   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2689   [(set_attr "type" "neon_fp_minmax_s<q>")]
2692 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2693 (define_insn "<fmaxmin><mode>3"
2694   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2695         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2696                        (match_operand:VCVTF 2 "s_register_operand" "w")]
2697                        VMAXMINFNM))]
2698   "TARGET_NEON && TARGET_VFP5"
2699   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2700   [(set_attr "type" "neon_fp_minmax_s<q>")]
2703 (define_expand "neon_vpadd<mode>"
2704   [(match_operand:VD 0 "s_register_operand")
2705    (match_operand:VD 1 "s_register_operand")
2706    (match_operand:VD 2 "s_register_operand")]
2707   "TARGET_NEON"
2709   emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2710                                             operands[2]));
2711   DONE;
2714 (define_insn "neon_vpaddl<sup><mode>"
2715   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2716         (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2717                                  VPADDL))]
2718   "TARGET_NEON"
2719   "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2720   [(set_attr "type" "neon_reduc_add_long")]
2723 (define_insn "neon_vpadal<sup><mode>"
2724   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2725         (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2726                                   (match_operand:VDQIW 2 "s_register_operand" "w")]
2727                                  VPADAL))]
2728   "TARGET_NEON"
2729   "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2730   [(set_attr "type" "neon_reduc_add_acc")]
2733 (define_insn "neon_vp<maxmin><sup><mode>"
2734   [(set (match_operand:VDI 0 "s_register_operand" "=w")
2735         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2736                     (match_operand:VDI 2 "s_register_operand" "w")]
2737                    VPMAXMIN))]
2738   "TARGET_NEON"
2739   "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2740   [(set_attr "type" "neon_reduc_minmax<q>")]
2743 (define_insn "neon_vp<maxmin>f<mode>"
2744   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2745         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2746                     (match_operand:VCVTF 2 "s_register_operand" "w")]
2747                    VPMAXMINF))]
2748   "TARGET_NEON"
2749   "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2750   [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2753 (define_insn "neon_vrecps<mode>"
2754   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2755         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2756                        (match_operand:VCVTF 2 "s_register_operand" "w")]
2757                       UNSPEC_VRECPS))]
2758   "TARGET_NEON"
2759   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2760   [(set_attr "type" "neon_fp_recps_s<q>")]
2763 (define_insn "neon_vrecps<mode>"
2764   [(set
2765     (match_operand:VH 0 "s_register_operand" "=w")
2766     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2767                 (match_operand:VH 2 "s_register_operand" "w")]
2768      UNSPEC_VRECPS))]
2769   "TARGET_NEON_FP16INST"
2770   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2771   [(set_attr "type" "neon_fp_recps_s<q>")]
2774 (define_insn "neon_vrsqrts<mode>"
2775   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2776         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2777                        (match_operand:VCVTF 2 "s_register_operand" "w")]
2778                       UNSPEC_VRSQRTS))]
2779   "TARGET_NEON"
2780   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2781   [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2784 (define_insn "neon_vrsqrts<mode>"
2785   [(set
2786     (match_operand:VH 0 "s_register_operand" "=w")
2787     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2788                  (match_operand:VH 2 "s_register_operand" "w")]
2789      UNSPEC_VRSQRTS))]
2790  "TARGET_NEON_FP16INST"
2791  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2792  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2795 (define_expand "neon_vabs<mode>"
2796   [(match_operand:VDQW 0 "s_register_operand")
2797    (match_operand:VDQW 1 "s_register_operand")]
2798   "TARGET_NEON"
2800   emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2801   DONE;
2804 (define_insn "neon_vqabs<mode>"
2805   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2806         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2807                       UNSPEC_VQABS))]
2808   "TARGET_NEON"
2809   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2810   [(set_attr "type" "neon_qabs<q>")]
2813 (define_insn "neon_bswap<mode>"
2814   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2815         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2816   "TARGET_NEON"
2817   "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2818   [(set_attr "type" "neon_rev<q>")]
2821 (define_expand "neon_vneg<mode>"
2822   [(match_operand:VDQW 0 "s_register_operand")
2823    (match_operand:VDQW 1 "s_register_operand")]
2824   "TARGET_NEON"
2826   emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
2827   DONE;
2831 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2832 ;; fact that their usage need to guarantee that the source vectors are
2833 ;; contiguous.  It would be wrong to describe the operation without being able
2834 ;; to describe the permute that is also required, but even if that is done
2835 ;; the permute would have been created as a LOAD_LANES which means the values
2836 ;; in the registers are in the wrong order.
2837 (define_insn "neon_vcadd<rot><mode>"
2838   [(set (match_operand:VF 0 "register_operand" "=w")
2839         (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2840                     (match_operand:VF 2 "register_operand" "w")]
2841                     VCADD))]
2842   "TARGET_COMPLEX"
2843   "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2844   [(set_attr "type" "neon_fcadd")]
2847 (define_insn "neon_vcmla<rot><mode>"
2848   [(set (match_operand:VF 0 "register_operand" "=w")
2849         (plus:VF (match_operand:VF 1 "register_operand" "0")
2850                  (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2851                              (match_operand:VF 3 "register_operand" "w")]
2852                              VCMLA)))]
2853   "TARGET_COMPLEX"
2854   "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2855   [(set_attr "type" "neon_fcmla")]
2858 (define_insn "neon_vcmla_lane<rot><mode>"
2859   [(set (match_operand:VF 0 "s_register_operand" "=w")
2860         (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2861                  (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2862                              (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2863                              (match_operand:SI 4 "const_int_operand" "n")]
2864                              VCMLA)))]
2865   "TARGET_COMPLEX"
2866   {
2867     operands = neon_vcmla_lane_prepare_operands (operands);
2868     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2869   }
2870   [(set_attr "type" "neon_fcmla")]
2873 (define_insn "neon_vcmla_laneq<rot><mode>"
2874   [(set (match_operand:VDF 0 "s_register_operand" "=w")
2875         (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
2876                   (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
2877                               (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
2878                               (match_operand:SI 4 "const_int_operand" "n")]
2879                               VCMLA)))]
2880   "TARGET_COMPLEX"
2881   {
2882     operands = neon_vcmla_lane_prepare_operands (operands);
2883     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2884   }
2885   [(set_attr "type" "neon_fcmla")]
2888 (define_insn "neon_vcmlaq_lane<rot><mode>"
2889   [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
2890         (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
2891                  (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
2892                                  (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
2893                                  (match_operand:SI 4 "const_int_operand" "n")]
2894                                  VCMLA)))]
2895   "TARGET_COMPLEX"
2896   {
2897     operands = neon_vcmla_lane_prepare_operands (operands);
2898     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2899   }
2900   [(set_attr "type" "neon_fcmla")]
2903 ;; The complex mul operations always need to expand to two instructions.
2904 ;; The first operation does half the computation and the second does the
2905 ;; remainder.  Because of this, expand early.
2906 (define_expand "cmul<conj_op><mode>3"
2907   [(set (match_operand:VDF 0 "register_operand")
2908         (unspec:VDF [(match_operand:VDF 1 "register_operand")
2909                      (match_operand:VDF 2 "register_operand")]
2910                     VCMUL_OP))]
2911   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
2913   rtx res1 = gen_reg_rtx (<MODE>mode);
2914   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
2915   emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
2916                                               operands[2], operands[1]));
2917   emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
2918                                               operands[2], operands[1]));
2919   DONE;
2923 ;; These map to the auto-vectorizer Dot Product optab.
2924 ;; The auto-vectorizer expects a dot product builtin that also does an
2925 ;; accumulation into the provided register.
2926 ;; Given the following pattern
2928 ;; for (i=0; i<len; i++) {
2929 ;;     c = a[i] * b[i];
2930 ;;     r += c;
2931 ;; }
2932 ;; return result;
2934 ;; This can be auto-vectorized to
2935 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
2937 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
2938 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
2939 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
2940 ;; ...
2942 ;; and so the vectorizer provides r, in which the result has to be accumulated.
2943 (define_insn "<sup>dot_prod<vsi2qi>"
2944   [(set (match_operand:VCVTI 0 "register_operand" "=w")
2945         (plus:VCVTI
2946           (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
2947                          (match_operand:<VSI2QI> 2 "register_operand" "w")]
2948                          DOTPROD)
2949           (match_operand:VCVTI 3 "register_operand" "0")))]
2950   "TARGET_DOTPROD"
2951   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2952   [(set_attr "type" "neon_dot<q>")]
2955 ;; These instructions map to the __builtins for the Dot Product operations
2956 (define_expand "neon_<sup>dot<vsi2qi>"
2957   [(set (match_operand:VCVTI 0 "register_operand" "=w")
2958         (plus:VCVTI
2959           (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand")
2960                          (match_operand:<VSI2QI> 3 "register_operand")]
2961                          DOTPROD)
2962           (match_operand:VCVTI 1 "register_operand")))]
2963   "TARGET_DOTPROD"
2966 ;; These instructions map to the __builtins for the Dot Product operations.
2967 (define_insn "neon_usdot<vsi2qi>"
2968   [(set (match_operand:VCVTI 0 "register_operand" "=w")
2969         (plus:VCVTI
2970           (unspec:VCVTI
2971             [(match_operand:<VSI2QI> 2 "register_operand" "w")
2972             (match_operand:<VSI2QI> 3 "register_operand" "w")]
2973             UNSPEC_DOT_US)
2974           (match_operand:VCVTI 1 "register_operand" "0")))]
2975   "TARGET_I8MM"
2976   "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2977   [(set_attr "type" "neon_dot<q>")]
2980 ;; These instructions map to the __builtins for the Dot Product
2981 ;; indexed operations.
2982 (define_insn "neon_<sup>dot_lane<vsi2qi>"
2983   [(set (match_operand:VCVTI 0 "register_operand" "=w")
2984         (plus:VCVTI
2985           (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
2986                          (match_operand:V8QI 3 "register_operand" "t")
2987                          (match_operand:SI 4 "immediate_operand" "i")]
2988                          DOTPROD)
2989           (match_operand:VCVTI 1 "register_operand" "0")))]
2990   "TARGET_DOTPROD"
2991   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
2992   [(set_attr "type" "neon_dot<q>")]
2995 ;; These instructions map to the __builtins for the Dot Product
2996 ;; indexed operations.
2997 (define_insn "neon_<sup>dot_laneq<vsi2qi>"
2998   [(set (match_operand:VCVTI 0 "register_operand" "=w")
2999         (plus:VCVTI
3000           (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
3001                          (match_operand:V16QI 3 "register_operand" "t")
3002                          (match_operand:SI 4 "immediate_operand" "i")]
3003                          DOTPROD)
3004           (match_operand:VCVTI 1 "register_operand" "0")))]
3005   "TARGET_DOTPROD"
3006   {
3007     int lane = INTVAL (operands[4]);
3008     if (lane > GET_MODE_NUNITS (V2SImode) - 1)
3009       {
3010         operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
3011         return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
3012       }
3013     else
3014       {
3015         operands[4] = GEN_INT (lane);
3016         return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3017       }
3018   }
3019   [(set_attr "type" "neon_dot<q>")]
3022 ;; These instructions map to the __builtins for the Dot Product
3023 ;; indexed operations in the v8.6 I8MM extension.
3024 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3025   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3026         (plus:VCVTI
3027           (unspec:VCVTI
3028            [(match_operand:<VSI2QI> 2 "register_operand" "w")
3029             (match_operand:V8QI 3 "register_operand" "t")
3030             (match_operand:SI 4 "immediate_operand" "i")]
3031             DOTPROD_I8MM)
3032           (match_operand:VCVTI 1 "register_operand" "0")))]
3033   "TARGET_I8MM"
3034   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
3035   [(set_attr "type" "neon_dot<q>")]
3038 ;; These instructions map to the __builtins for the Dot Product
3039 ;; indexed operations in the v8.6 I8MM extension.
3040 (define_insn "neon_<sup>dot_laneq<vsi2qi>"
3041   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3042         (plus:VCVTI
3043           (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
3044                          (match_operand:V16QI 3 "register_operand" "t")
3045                          (match_operand:SI 4 "immediate_operand" "i")]
3046                          DOTPROD_I8MM)
3047           (match_operand:VCVTI 1 "register_operand" "0")))]
3048   "TARGET_I8MM"
3049   {
3050     int lane = INTVAL (operands[4]);
3051     if (lane > GET_MODE_NUNITS (V2SImode) - 1)
3052       {
3053         operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
3054         return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
3055       }
3056     else
3057       {
3058         operands[4] = GEN_INT (lane);
3059         return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3060       }
3061   }
3062   [(set_attr "type" "neon_dot<q>")]
3065 ;; Auto-vectorizer pattern for usdot
3066 (define_expand "usdot_prod<vsi2qi>"
3067   [(set (match_operand:VCVTI 0 "register_operand")
3068         (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3069                                                         "register_operand")
3070                                    (match_operand:<VSI2QI> 2
3071                                                         "register_operand")]
3072                      UNSPEC_DOT_US)
3073                     (match_operand:VCVTI 3 "register_operand")))]
3074   "TARGET_I8MM"
3077 (define_expand "copysign<mode>3"
3078   [(match_operand:VCVTF 0 "register_operand")
3079    (match_operand:VCVTF 1 "register_operand")
3080    (match_operand:VCVTF 2 "register_operand")]
3081   "TARGET_NEON"
3082   "{
3083      rtx v_bitmask_cast;
3084      rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3085      rtx c = gen_int_mode (0x80000000, SImode);
3087      emit_move_insn (v_bitmask,
3088                      gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3089      emit_move_insn (operands[0], operands[2]);
3090      v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3091                                            <VCVTF:V_cmp_result>mode, 0);
3092      emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3093                                      operands[1]));
3095      DONE;
3096   }"
3099 (define_insn "neon_vqneg<mode>"
3100   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3101         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3102                       UNSPEC_VQNEG))]
3103   "TARGET_NEON"
3104   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3105   [(set_attr "type" "neon_qneg<q>")]
3108 (define_insn "neon_vcls<mode>"
3109   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3110         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3111                       UNSPEC_VCLS))]
3112   "TARGET_NEON"
3113   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3114   [(set_attr "type" "neon_cls<q>")]
3117 (define_insn "neon_vclz<mode>"
3118   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3119         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3120   "TARGET_NEON"
3121   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3122   [(set_attr "type" "neon_cnt<q>")]
3125 (define_insn "popcount<mode>2"
3126   [(set (match_operand:VE 0 "s_register_operand" "=w")
3127         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3128   "TARGET_NEON"
3129   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3130   [(set_attr "type" "neon_cnt<q>")]
3133 (define_expand "neon_vcnt<mode>"
3134   [(match_operand:VE 0 "s_register_operand")
3135    (match_operand:VE 1 "s_register_operand")]
3136   "TARGET_NEON"
3138   emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3139   DONE;
3142 (define_insn "neon_vrecpe<mode>"
3143   [(set (match_operand:VH 0 "s_register_operand" "=w")
3144         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3145                    UNSPEC_VRECPE))]
3146   "TARGET_NEON_FP16INST"
3147   "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3148   [(set_attr "type" "neon_fp_recpe_s<q>")]
3151 (define_insn "neon_vrecpe<mode>"
3152   [(set (match_operand:V32 0 "s_register_operand" "=w")
3153         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3154                     UNSPEC_VRECPE))]
3155   "TARGET_NEON"
3156   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3157   [(set_attr "type" "neon_fp_recpe_s<q>")]
3160 (define_insn "neon_vrsqrte<mode>"
3161   [(set (match_operand:V32 0 "s_register_operand" "=w")
3162         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3163                     UNSPEC_VRSQRTE))]
3164   "TARGET_NEON"
3165   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3166   [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3169 (define_expand "neon_vmvn<mode>"
3170   [(match_operand:VDQIW 0 "s_register_operand")
3171    (match_operand:VDQIW 1 "s_register_operand")]
3172   "TARGET_NEON"
3174   emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
3175   DONE;
3178 (define_insn "neon_vget_lane<mode>_sext_internal"
3179   [(set (match_operand:SI 0 "s_register_operand" "=r")
3180         (sign_extend:SI
3181           (vec_select:<V_elem>
3182             (match_operand:VD 1 "s_register_operand" "w")
3183             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3184   "TARGET_NEON"
3186   if (BYTES_BIG_ENDIAN)
3187     {
3188       int elt = INTVAL (operands[2]);
3189       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3190       operands[2] = GEN_INT (elt);
3191     }
3192   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3194   [(set_attr "type" "neon_to_gp")]
3197 (define_insn "neon_vget_lane<mode>_zext_internal"
3198   [(set (match_operand:SI 0 "s_register_operand" "=r")
3199         (zero_extend:SI
3200           (vec_select:<V_elem>
3201             (match_operand:VD 1 "s_register_operand" "w")
3202             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3203   "TARGET_NEON"
3205   if (BYTES_BIG_ENDIAN)
3206     {
3207       int elt = INTVAL (operands[2]);
3208       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3209       operands[2] = GEN_INT (elt);
3210     }
3211   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3213   [(set_attr "type" "neon_to_gp")]
3216 (define_insn "neon_vget_lane<mode>_sext_internal"
3217   [(set (match_operand:SI 0 "s_register_operand" "=r")
3218         (sign_extend:SI
3219           (vec_select:<V_elem>
3220             (match_operand:VQ2 1 "s_register_operand" "w")
3221             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3222   "TARGET_NEON"
3224   rtx ops[3];
3225   int regno = REGNO (operands[1]);
3226   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3227   unsigned int elt = INTVAL (operands[2]);
3228   unsigned int elt_adj = elt % halfelts;
3230   if (BYTES_BIG_ENDIAN)
3231     elt_adj = halfelts - 1 - elt_adj;
3233   ops[0] = operands[0];
3234   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3235   ops[2] = GEN_INT (elt_adj);
3236   output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3238   return "";
3240   [(set_attr "type" "neon_to_gp_q")]
3243 (define_insn "neon_vget_lane<mode>_zext_internal"
3244   [(set (match_operand:SI 0 "s_register_operand" "=r")
3245         (zero_extend:SI
3246           (vec_select:<V_elem>
3247             (match_operand:VQ2 1 "s_register_operand" "w")
3248             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3249   "TARGET_NEON"
3251   rtx ops[3];
3252   int regno = REGNO (operands[1]);
3253   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3254   unsigned int elt = INTVAL (operands[2]);
3255   unsigned int elt_adj = elt % halfelts;
3257   if (BYTES_BIG_ENDIAN)
3258     elt_adj = halfelts - 1 - elt_adj;
3260   ops[0] = operands[0];
3261   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3262   ops[2] = GEN_INT (elt_adj);
3263   output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3265   return "";
3267   [(set_attr "type" "neon_to_gp_q")]
3270 (define_expand "neon_vget_lane<mode>"
3271   [(match_operand:<V_ext> 0 "s_register_operand")
3272    (match_operand:VDQW 1 "s_register_operand")
3273    (match_operand:SI 2 "immediate_operand")]
3274   "TARGET_NEON"
3276   if (BYTES_BIG_ENDIAN)
3277     {
3278       /* The intrinsics are defined in terms of a model where the
3279          element ordering in memory is vldm order, whereas the generic
3280          RTL is defined in terms of a model where the element ordering
3281          in memory is array order.  Convert the lane number to conform
3282          to this model.  */
3283       unsigned int elt = INTVAL (operands[2]);
3284       unsigned int reg_nelts
3285         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3286       elt ^= reg_nelts - 1;
3287       operands[2] = GEN_INT (elt);
3288     }
3290   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3291     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3292                                                 operands[2]));
3293   else
3294     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3295                                                        operands[1],
3296                                                        operands[2]));
3297   DONE;
3300 (define_expand "neon_vget_laneu<mode>"
3301   [(match_operand:<V_ext> 0 "s_register_operand")
3302    (match_operand:VDQIW 1 "s_register_operand")
3303    (match_operand:SI 2 "immediate_operand")]
3304   "TARGET_NEON"
3306   if (BYTES_BIG_ENDIAN)
3307     {
3308       /* The intrinsics are defined in terms of a model where the
3309          element ordering in memory is vldm order, whereas the generic
3310          RTL is defined in terms of a model where the element ordering
3311          in memory is array order.  Convert the lane number to conform
3312          to this model.  */
3313       unsigned int elt = INTVAL (operands[2]);
3314       unsigned int reg_nelts
3315         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3316       elt ^= reg_nelts - 1;
3317       operands[2] = GEN_INT (elt);
3318     }
3320   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3321     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3322                                                 operands[2]));
3323   else
3324     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3325                                                        operands[1],
3326                                                        operands[2]));
3327   DONE;
3330 (define_expand "neon_vget_lanedi"
3331   [(match_operand:DI 0 "s_register_operand")
3332    (match_operand:DI 1 "s_register_operand")
3333    (match_operand:SI 2 "immediate_operand")]
3334   "TARGET_NEON"
3336   emit_move_insn (operands[0], operands[1]);
3337   DONE;
3340 (define_expand "neon_vget_lanev2di"
3341   [(match_operand:DI 0 "s_register_operand")
3342    (match_operand:V2DI 1 "s_register_operand")
3343    (match_operand:SI 2 "immediate_operand")]
3344   "TARGET_NEON"
3346   int lane;
3348 if (BYTES_BIG_ENDIAN)
3349     {
3350       /* The intrinsics are defined in terms of a model where the
3351          element ordering in memory is vldm order, whereas the generic
3352          RTL is defined in terms of a model where the element ordering
3353          in memory is array order.  Convert the lane number to conform
3354          to this model.  */
3355       unsigned int elt = INTVAL (operands[2]);
3356       unsigned int reg_nelts = 2;
3357       elt ^= reg_nelts - 1;
3358       operands[2] = GEN_INT (elt);
3359     }
3361   lane = INTVAL (operands[2]);
3362   gcc_assert ((lane ==0) || (lane == 1));
3363   emit_move_insn (operands[0], lane == 0
3364                                 ? gen_lowpart (DImode, operands[1])
3365                                 : gen_highpart (DImode, operands[1]));
3366   DONE;
3369 (define_expand "neon_vset_lane<mode>"
3370   [(match_operand:VDQ 0 "s_register_operand")
3371    (match_operand:<V_elem> 1 "s_register_operand")
3372    (match_operand:VDQ 2 "s_register_operand")
3373    (match_operand:SI 3 "immediate_operand")]
3374   "TARGET_NEON"
3376   unsigned int elt = INTVAL (operands[3]);
3378   if (BYTES_BIG_ENDIAN)
3379     {
3380       unsigned int reg_nelts
3381         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3382       elt ^= reg_nelts - 1;
3383     }
3385   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3386                                          GEN_INT (1 << elt), operands[2]));
3387   DONE;
3390 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3392 (define_expand "neon_vset_lanedi"
3393   [(match_operand:DI 0 "s_register_operand")
3394    (match_operand:DI 1 "s_register_operand")
3395    (match_operand:DI 2 "s_register_operand")
3396    (match_operand:SI 3 "immediate_operand")]
3397   "TARGET_NEON"
3399   emit_move_insn (operands[0], operands[1]);
3400   DONE;
3403 (define_expand "neon_vcreate<mode>"
3404   [(match_operand:VD_RE 0 "s_register_operand")
3405    (match_operand:DI 1 "general_operand")]
3406   "TARGET_NEON"
3408   rtx src = gen_lowpart (<MODE>mode, operands[1]);
3409   emit_move_insn (operands[0], src);
3410   DONE;
3413 (define_insn "neon_vdup_n<mode>"
3414   [(set (match_operand:VX 0 "s_register_operand" "=w")
3415         (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3416   "TARGET_NEON"
3417   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3418   [(set_attr "type" "neon_from_gp<q>")]
3421 (define_insn "neon_vdup_nv4hf"
3422   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3423         (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3424   "TARGET_NEON"
3425   "vdup.16\t%P0, %1"
3426   [(set_attr "type" "neon_from_gp")]
3429 (define_insn "neon_vdup_nv8hf"
3430   [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3431         (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3432   "TARGET_NEON"
3433   "vdup.16\t%q0, %1"
3434   [(set_attr "type" "neon_from_gp_q")]
3437 (define_insn "neon_vdup_nv4bf"
3438   [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3439         (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3440   "TARGET_NEON"
3441   "vdup.16\t%P0, %1"
3442   [(set_attr "type" "neon_from_gp")]
3445 (define_insn "neon_vdup_nv8bf"
3446   [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3447         (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3448   "TARGET_NEON"
3449   "vdup.16\t%q0, %1"
3450   [(set_attr "type" "neon_from_gp_q")]
3453 (define_insn "neon_vdup_n<mode>"
3454   [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3455         (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3456   "TARGET_NEON"
3457   "@
3458   vdup.<V_sz_elem>\t%<V_reg>0, %1
3459   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3460   [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3463 (define_expand "neon_vdup_ndi"
3464   [(match_operand:DI 0 "s_register_operand")
3465    (match_operand:DI 1 "s_register_operand")]
3466   "TARGET_NEON"
3468   emit_move_insn (operands[0], operands[1]);
3469   DONE;
3473 (define_insn "neon_vdup_nv2di"
3474   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3475         (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3476   "TARGET_NEON"
3477   "@
3478   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3479   vmov\t%e0, %P1\;vmov\t%f0, %P1"
3480   [(set_attr "length" "8")
3481    (set_attr "type" "multiple")]
3484 (define_insn "neon_vdup_lane<mode>_internal"
3485   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3486         (vec_duplicate:VDQW 
3487           (vec_select:<V_elem>
3488             (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3489             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3490   "TARGET_NEON"
3492   if (BYTES_BIG_ENDIAN)
3493     {
3494       int elt = INTVAL (operands[2]);
3495       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3496       operands[2] = GEN_INT (elt);
3497     }
3498   if (<Is_d_reg>)
3499     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3500   else
3501     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3503   [(set_attr "type" "neon_dup<q>")]
3506 (define_insn "neon_vdup_lane<mode>_internal"
3507  [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3508    (vec_duplicate:VHFBF
3509     (vec_select:<V_elem>
3510      (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3511      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3512  "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3514   if (BYTES_BIG_ENDIAN)
3515     {
3516       int elt = INTVAL (operands[2]);
3517       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3518       operands[2] = GEN_INT (elt);
3519     }
3520   if (<Is_d_reg>)
3521     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3522   else
3523     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3525   [(set_attr "type" "neon_dup<q>")]
3528 (define_expand "neon_vdup_lane<mode>"
3529   [(match_operand:VDQW 0 "s_register_operand")
3530    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3531    (match_operand:SI 2 "immediate_operand")]
3532   "TARGET_NEON"
3534   if (BYTES_BIG_ENDIAN)
3535     {
3536       unsigned int elt = INTVAL (operands[2]);
3537       unsigned int reg_nelts
3538         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3539       elt ^= reg_nelts - 1;
3540       operands[2] = GEN_INT (elt);
3541     }
3542     emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3543                                                   operands[2]));
3544     DONE;
3547 (define_expand "neon_vdup_lane<mode>"
3548   [(match_operand:VHFBF 0 "s_register_operand")
3549    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3550    (match_operand:SI 2 "immediate_operand")]
3551   "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3553   if (BYTES_BIG_ENDIAN)
3554     {
3555       unsigned int elt = INTVAL (operands[2]);
3556       unsigned int reg_nelts
3557         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3558       elt ^= reg_nelts - 1;
3559       operands[2] = GEN_INT (elt);
3560     }
3561   emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3562                                                 operands[2]));
3563   DONE;
3566 ; Scalar index is ignored, since only zero is valid here.
3567 (define_expand "neon_vdup_lanedi"
3568   [(match_operand:DI 0 "s_register_operand")
3569    (match_operand:DI 1 "s_register_operand")
3570    (match_operand:SI 2 "immediate_operand")]
3571   "TARGET_NEON"
3573   emit_move_insn (operands[0], operands[1]);
3574   DONE;
3577 ; Likewise for v2di, as the DImode second operand has only a single element.
3578 (define_expand "neon_vdup_lanev2di"
3579   [(match_operand:V2DI 0 "s_register_operand")
3580    (match_operand:DI 1 "s_register_operand")
3581    (match_operand:SI 2 "immediate_operand")]
3582   "TARGET_NEON"
3584   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3585   DONE;
3588 ; Disabled before reload because we don't want combine doing something silly,
3589 ; but used by the post-reload expansion of neon_vcombine.
3590 (define_insn "*neon_vswp<mode>"
3591   [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3592         (match_operand:VDQX 1 "s_register_operand" "+w"))
3593    (set (match_dup 1) (match_dup 0))]
3594   "TARGET_NEON && reload_completed"
3595   "vswp\t%<V_reg>0, %<V_reg>1"
3596   [(set_attr "type" "neon_permute<q>")]
3599 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3600 ;; dest vector.
3601 ;; FIXME: A different implementation of this builtin could make it much
3602 ;; more likely that we wouldn't actually need to output anything (we could make
3603 ;; it so that the reg allocator puts things in the right places magically
3604 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3606 (define_insn_and_split "neon_vcombine<mode>"
3607   [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3608         (vec_concat:<V_DOUBLE>
3609           (match_operand:VDX 1 "s_register_operand" "w")
3610           (match_operand:VDX 2 "s_register_operand" "w")))]
3611   "TARGET_NEON"
3612   "#"
3613   "&& reload_completed"
3614   [(const_int 0)]
3616   neon_split_vcombine (operands);
3617   DONE;
3619 [(set_attr "type" "multiple")]
3622 (define_expand "neon_vget_high<mode>"
3623   [(match_operand:<V_HALF> 0 "s_register_operand")
3624    (match_operand:VQXBF 1 "s_register_operand")]
3625   "TARGET_NEON"
3627   emit_move_insn (operands[0],
3628                   simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3629                                        GET_MODE_SIZE (<V_HALF>mode)));
3630   DONE;
3633 (define_expand "neon_vget_low<mode>"
3634   [(match_operand:<V_HALF> 0 "s_register_operand")
3635    (match_operand:VQX 1 "s_register_operand")]
3636   "TARGET_NEON"
3638   emit_move_insn (operands[0],
3639                   simplify_gen_subreg (<V_HALF>mode, operands[1],
3640                                        <MODE>mode, 0));
3641   DONE;
3644 (define_insn "float<mode><V_cvtto>2"
3645   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3646         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3647   "TARGET_NEON && !flag_rounding_math"
3648   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3649   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3652 (define_insn "floatuns<mode><V_cvtto>2"
3653   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3654         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 
3655   "TARGET_NEON && !flag_rounding_math"
3656   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3657   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3660 (define_insn "fix_trunc<mode><V_cvtto>2"
3661   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3662         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3663   "TARGET_NEON"
3664   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3665   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3668 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3669   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3670         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3671   "TARGET_NEON"
3672   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3673   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3676 (define_insn "neon_vcvt<sup><mode>"
3677   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3678         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3679                           VCVT_US))]
3680   "TARGET_NEON"
3681   "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3682   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3685 (define_insn "neon_vcvt<sup><mode>"
3686   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3687         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3688                           VCVT_US))]
3689   "TARGET_NEON"
3690   "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3691   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3694 (define_insn "neon_vcvtv4sfv4hf"
3695   [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3696         (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3697                           UNSPEC_VCVT))]
3698   "TARGET_NEON && TARGET_FP16"
3699   "vcvt.f32.f16\t%q0, %P1"
3700   [(set_attr "type" "neon_fp_cvt_widen_h")]
3703 (define_insn "neon_vcvtv4hfv4sf"
3704   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3705         (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3706                           UNSPEC_VCVT))]
3707   "TARGET_NEON && TARGET_FP16"
3708   "vcvt.f16.f32\t%P0, %q1"
3709   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3712 (define_insn "neon_vcvt<sup><mode>"
3713  [(set
3714    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3715    (unspec:<VH_CVTTO>
3716     [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3717     VCVT_US))]
3718  "TARGET_NEON_FP16INST"
3719  "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3720   [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3723 (define_insn "neon_vcvt<sup><mode>"
3724  [(set
3725    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3726    (unspec:<VH_CVTTO>
3727     [(match_operand:VH 1 "s_register_operand" "w")]
3728     VCVT_US))]
3729  "TARGET_NEON_FP16INST"
3730  "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3731   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3734 (define_insn "neon_vcvt<sup>_n<mode>"
3735   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3736         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3737                            (match_operand:SI 2 "immediate_operand" "i")]
3738                           VCVT_US_N))]
3739   "TARGET_NEON"
3741   arm_const_bounds (operands[2], 1, 33);
3742   return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3744   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3747 (define_insn "neon_vcvt<sup>_n<mode>"
3748  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3749    (unspec:<VH_CVTTO>
3750     [(match_operand:VH 1 "s_register_operand" "w")
3751      (match_operand:SI 2 "immediate_operand" "i")]
3752     VCVT_US_N))]
3753   "TARGET_NEON_FP16INST"
3755   arm_const_bounds (operands[2], 0, 17);
3756   return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3758  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3761 (define_insn "neon_vcvt<sup>_n<mode>"
3762   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3763         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3764                            (match_operand:SI 2 "immediate_operand" "i")]
3765                           VCVT_US_N))]
3766   "TARGET_NEON"
3768   arm_const_bounds (operands[2], 1, 33);
3769   return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3771   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3774 (define_insn "neon_vcvt<sup>_n<mode>"
3775  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3776    (unspec:<VH_CVTTO>
3777     [(match_operand:VCVTHI 1 "s_register_operand" "w")
3778      (match_operand:SI 2 "immediate_operand" "i")]
3779     VCVT_US_N))]
3780  "TARGET_NEON_FP16INST"
3782   arm_const_bounds (operands[2], 0, 17);
3783   return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3785  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3788 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3789  [(set
3790    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3791    (unspec:<VH_CVTTO>
3792     [(match_operand:VH 1 "s_register_operand" "w")]
3793     VCVT_HF_US))]
3794  "TARGET_NEON_FP16INST"
3795  "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3796   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3799 (define_insn "neon_vmovn<mode>"
3800   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3801         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3802                            UNSPEC_VMOVN))]
3803   "TARGET_NEON"
3804   "vmovn.<V_if_elem>\t%P0, %q1"
3805   [(set_attr "type" "neon_shift_imm_narrow_q")]
3808 (define_insn "neon_vqmovn<sup><mode>"
3809   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3810         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3811                            VQMOVN))]
3812   "TARGET_NEON"
3813   "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3814   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3817 (define_insn "neon_vqmovun<mode>"
3818   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3819         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3820                            UNSPEC_VQMOVUN))]
3821   "TARGET_NEON"
3822   "vqmovun.<V_s_elem>\t%P0, %q1"
3823   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3826 (define_insn "neon_vmovl<sup><mode>"
3827   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3828         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3829                           VMOVL))]
3830   "TARGET_NEON"
3831   "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3832   [(set_attr "type" "neon_shift_imm_long")]
3835 (define_insn "neon_vmul_lane<mode>"
3836   [(set (match_operand:VMD 0 "s_register_operand" "=w")
3837         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3838                      (match_operand:VMD 2 "s_register_operand"
3839                                         "<scalar_mul_constraint>")
3840                      (match_operand:SI 3 "immediate_operand" "i")]
3841                     UNSPEC_VMUL_LANE))]
3842   "TARGET_NEON"
3844   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3846   [(set (attr "type")
3847      (if_then_else (match_test "<Is_float_mode>")
3848                    (const_string "neon_fp_mul_s_scalar<q>")
3849                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3852 (define_insn "neon_vmul_lane<mode>"
3853   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3854         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3855                      (match_operand:<V_HALF> 2 "s_register_operand"
3856                                              "<scalar_mul_constraint>")
3857                      (match_operand:SI 3 "immediate_operand" "i")]
3858                     UNSPEC_VMUL_LANE))]
3859   "TARGET_NEON"
3861   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3863   [(set (attr "type")
3864      (if_then_else (match_test "<Is_float_mode>")
3865                    (const_string "neon_fp_mul_s_scalar<q>")
3866                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3869 (define_insn "neon_vmul_lane<mode>"
3870   [(set (match_operand:VH 0 "s_register_operand" "=w")
3871         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3872                     (match_operand:V4HF 2 "s_register_operand"
3873                      "<scalar_mul_constraint>")
3874                      (match_operand:SI 3 "immediate_operand" "i")]
3875                      UNSPEC_VMUL_LANE))]
3876   "TARGET_NEON_FP16INST"
3877   "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3878   [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3881 (define_insn "neon_vmull<sup>_lane<mode>"
3882   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3883         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3884                            (match_operand:VMDI 2 "s_register_operand"
3885                                                "<scalar_mul_constraint>")
3886                            (match_operand:SI 3 "immediate_operand" "i")]
3887                           VMULL_LANE))]
3888   "TARGET_NEON"
3890   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3892   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3895 (define_insn "neon_vqdmull_lane<mode>"
3896   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3897         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3898                            (match_operand:VMDI 2 "s_register_operand"
3899                                                "<scalar_mul_constraint>")
3900                            (match_operand:SI 3 "immediate_operand" "i")]
3901                           UNSPEC_VQDMULL_LANE))]
3902   "TARGET_NEON"
3904   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3906   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3909 (define_insn "neon_vq<r>dmulh_lane<mode>"
3910   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3911         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3912                       (match_operand:<V_HALF> 2 "s_register_operand"
3913                                               "<scalar_mul_constraint>")
3914                       (match_operand:SI 3 "immediate_operand" "i")]
3915                       VQDMULH_LANE))]
3916   "TARGET_NEON"
3918   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3920   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3923 (define_insn "neon_vq<r>dmulh_lane<mode>"
3924   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3925         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3926                       (match_operand:VMDI 2 "s_register_operand"
3927                                           "<scalar_mul_constraint>")
3928                       (match_operand:SI 3 "immediate_operand" "i")]
3929                       VQDMULH_LANE))]
3930   "TARGET_NEON"
3932   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3934   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3937 ;; vqrdmlah_lane, vqrdmlsh_lane
3938 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3939   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3940         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3941                       (match_operand:VMQI 2 "s_register_operand" "w")
3942                       (match_operand:<V_HALF> 3 "s_register_operand"
3943                                           "<scalar_mul_constraint>")
3944                       (match_operand:SI 4 "immediate_operand" "i")]
3945                      VQRDMLH_AS))]
3946   "TARGET_NEON_RDMA"
3948   return
3949    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3951   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3954 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3955   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3956         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3957                       (match_operand:VMDI 2 "s_register_operand" "w")
3958                       (match_operand:VMDI 3 "s_register_operand"
3959                                           "<scalar_mul_constraint>")
3960                       (match_operand:SI 4 "immediate_operand" "i")]
3961                      VQRDMLH_AS))]
3962   "TARGET_NEON_RDMA"
3964   return
3965    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3967   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3970 (define_insn "neon_vmla_lane<mode>"
3971   [(set (match_operand:VMD 0 "s_register_operand" "=w")
3972         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3973                      (match_operand:VMD 2 "s_register_operand" "w")
3974                      (match_operand:VMD 3 "s_register_operand"
3975                                         "<scalar_mul_constraint>")
3976                      (match_operand:SI 4 "immediate_operand" "i")]
3977                      UNSPEC_VMLA_LANE))]
3978   "TARGET_NEON"
3980   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3982   [(set (attr "type")
3983      (if_then_else (match_test "<Is_float_mode>")
3984                    (const_string "neon_fp_mla_s_scalar<q>")
3985                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3988 (define_insn "neon_vmla_lane<mode>"
3989   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3990         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3991                      (match_operand:VMQ 2 "s_register_operand" "w")
3992                      (match_operand:<V_HALF> 3 "s_register_operand"
3993                                              "<scalar_mul_constraint>")
3994                      (match_operand:SI 4 "immediate_operand" "i")]
3995                      UNSPEC_VMLA_LANE))]
3996   "TARGET_NEON"
3998   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4000   [(set (attr "type")
4001      (if_then_else (match_test "<Is_float_mode>")
4002                    (const_string "neon_fp_mla_s_scalar<q>")
4003                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4006 (define_insn "neon_vmlal<sup>_lane<mode>"
4007   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4008         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4009                            (match_operand:VMDI 2 "s_register_operand" "w")
4010                            (match_operand:VMDI 3 "s_register_operand"
4011                                                "<scalar_mul_constraint>")
4012                            (match_operand:SI 4 "immediate_operand" "i")]
4013                           VMLAL_LANE))]
4014   "TARGET_NEON"
4016   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4018   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4021 (define_insn "neon_vqdmlal_lane<mode>"
4022   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4023         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4024                            (match_operand:VMDI 2 "s_register_operand" "w")
4025                            (match_operand:VMDI 3 "s_register_operand"
4026                                                "<scalar_mul_constraint>")
4027                            (match_operand:SI 4 "immediate_operand" "i")]
4028                           UNSPEC_VQDMLAL_LANE))]
4029   "TARGET_NEON"
4031   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4033   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4036 (define_insn "neon_vmls_lane<mode>"
4037   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4038         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4039                      (match_operand:VMD 2 "s_register_operand" "w")
4040                      (match_operand:VMD 3 "s_register_operand"
4041                                         "<scalar_mul_constraint>")
4042                      (match_operand:SI 4 "immediate_operand" "i")]
4043                     UNSPEC_VMLS_LANE))]
4044   "TARGET_NEON"
4046   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4048   [(set (attr "type")
4049      (if_then_else (match_test "<Is_float_mode>")
4050                    (const_string "neon_fp_mla_s_scalar<q>")
4051                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4054 (define_insn "neon_vmls_lane<mode>"
4055   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4056         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4057                      (match_operand:VMQ 2 "s_register_operand" "w")
4058                      (match_operand:<V_HALF> 3 "s_register_operand"
4059                                              "<scalar_mul_constraint>")
4060                      (match_operand:SI 4 "immediate_operand" "i")]
4061                     UNSPEC_VMLS_LANE))]
4062   "TARGET_NEON"
4064   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4066   [(set (attr "type")
4067      (if_then_else (match_test "<Is_float_mode>")
4068                    (const_string "neon_fp_mla_s_scalar<q>")
4069                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4072 (define_insn "neon_vmlsl<sup>_lane<mode>"
4073   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4074         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4075                            (match_operand:VMDI 2 "s_register_operand" "w")
4076                            (match_operand:VMDI 3 "s_register_operand"
4077                                                "<scalar_mul_constraint>")
4078                            (match_operand:SI 4 "immediate_operand" "i")]
4079                           VMLSL_LANE))]
4080   "TARGET_NEON"
4082   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4084   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4087 (define_insn "neon_vqdmlsl_lane<mode>"
4088   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4089         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4090                            (match_operand:VMDI 2 "s_register_operand" "w")
4091                            (match_operand:VMDI 3 "s_register_operand"
4092                                                "<scalar_mul_constraint>")
4093                            (match_operand:SI 4 "immediate_operand" "i")]
4094                           UNSPEC_VQDMLSL_LANE))]
4095   "TARGET_NEON"
4097   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4099   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4102 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4103 ; core register into a temp register, then use a scalar taken from that. This
4104 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4105 ; or extracted from another vector. The latter case it's currently better to
4106 ; use the "_lane" variant, and the former case can probably be implemented
4107 ; using vld1_lane, but that hasn't been done yet.
4109 (define_expand "neon_vmul_n<mode>"
4110   [(match_operand:VMD 0 "s_register_operand")
4111    (match_operand:VMD 1 "s_register_operand")
4112    (match_operand:<V_elem> 2 "s_register_operand")]
4113   "TARGET_NEON"
4115   rtx tmp = gen_reg_rtx (<MODE>mode);
4116   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4117   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4118                                        const0_rtx));
4119   DONE;
4122 (define_expand "neon_vmul_n<mode>"
4123   [(match_operand:VMQ 0 "s_register_operand")
4124    (match_operand:VMQ 1 "s_register_operand")
4125    (match_operand:<V_elem> 2 "s_register_operand")]
4126   "TARGET_NEON"
4128   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4129   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4130   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4131                                        const0_rtx));
4132   DONE;
4135 (define_expand "neon_vmul_n<mode>"
4136   [(match_operand:VH 0 "s_register_operand")
4137    (match_operand:VH 1 "s_register_operand")
4138    (match_operand:<V_elem> 2 "s_register_operand")]
4139   "TARGET_NEON_FP16INST"
4141   rtx tmp = gen_reg_rtx (V4HFmode);
4142   emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4143   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4144                                        const0_rtx));
4145   DONE;
4148 (define_expand "neon_vmulls_n<mode>"
4149   [(match_operand:<V_widen> 0 "s_register_operand")
4150    (match_operand:VMDI 1 "s_register_operand")
4151    (match_operand:<V_elem> 2 "s_register_operand")]
4152   "TARGET_NEON"
4154   rtx tmp = gen_reg_rtx (<MODE>mode);
4155   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4156   emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4157                                          const0_rtx));
4158   DONE;
4161 (define_expand "neon_vmullu_n<mode>"
4162   [(match_operand:<V_widen> 0 "s_register_operand")
4163    (match_operand:VMDI 1 "s_register_operand")
4164    (match_operand:<V_elem> 2 "s_register_operand")]
4165   "TARGET_NEON"
4167   rtx tmp = gen_reg_rtx (<MODE>mode);
4168   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4169   emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4170                                          const0_rtx));
4171   DONE;
4174 (define_expand "neon_vqdmull_n<mode>"
4175   [(match_operand:<V_widen> 0 "s_register_operand")
4176    (match_operand:VMDI 1 "s_register_operand")
4177    (match_operand:<V_elem> 2 "s_register_operand")]
4178   "TARGET_NEON"
4180   rtx tmp = gen_reg_rtx (<MODE>mode);
4181   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4182   emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4183                                           const0_rtx));
4184   DONE;
4187 (define_expand "neon_vqdmulh_n<mode>"
4188   [(match_operand:VMDI 0 "s_register_operand")
4189    (match_operand:VMDI 1 "s_register_operand")
4190    (match_operand:<V_elem> 2 "s_register_operand")]
4191   "TARGET_NEON"
4193   rtx tmp = gen_reg_rtx (<MODE>mode);
4194   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4195   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4196                                           const0_rtx));
4197   DONE;
4200 (define_expand "neon_vqrdmulh_n<mode>"
4201   [(match_operand:VMDI 0 "s_register_operand")
4202    (match_operand:VMDI 1 "s_register_operand")
4203    (match_operand:<V_elem> 2 "s_register_operand")]
4204   "TARGET_NEON"
4206   rtx tmp = gen_reg_rtx (<MODE>mode);
4207   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4208   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4209                                           const0_rtx));
4210   DONE;
4213 (define_expand "neon_vqdmulh_n<mode>"
4214   [(match_operand:VMQI 0 "s_register_operand")
4215    (match_operand:VMQI 1 "s_register_operand")
4216    (match_operand:<V_elem> 2 "s_register_operand")]
4217   "TARGET_NEON"
4219   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4220   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4221   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4222                                           const0_rtx));
4223   DONE;
4226 (define_expand "neon_vqrdmulh_n<mode>"
4227   [(match_operand:VMQI 0 "s_register_operand")
4228    (match_operand:VMQI 1 "s_register_operand")
4229    (match_operand:<V_elem> 2 "s_register_operand")]
4230   "TARGET_NEON"
4232   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4233   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4234   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4235                                            const0_rtx));
4236   DONE;
4239 (define_expand "neon_vmla_n<mode>"
4240   [(match_operand:VMD 0 "s_register_operand")
4241    (match_operand:VMD 1 "s_register_operand")
4242    (match_operand:VMD 2 "s_register_operand")
4243    (match_operand:<V_elem> 3 "s_register_operand")]
4244   "TARGET_NEON"
4246   rtx tmp = gen_reg_rtx (<MODE>mode);
4247   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4248   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4249                                        tmp, const0_rtx));
4250   DONE;
4253 (define_expand "neon_vmla_n<mode>"
4254   [(match_operand:VMQ 0 "s_register_operand")
4255    (match_operand:VMQ 1 "s_register_operand")
4256    (match_operand:VMQ 2 "s_register_operand")
4257    (match_operand:<V_elem> 3 "s_register_operand")]
4258   "TARGET_NEON"
4260   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4261   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4262   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4263                                        tmp, const0_rtx));
4264   DONE;
4267 (define_expand "neon_vmlals_n<mode>"
4268   [(match_operand:<V_widen> 0 "s_register_operand")
4269    (match_operand:<V_widen> 1 "s_register_operand")
4270    (match_operand:VMDI 2 "s_register_operand")
4271    (match_operand:<V_elem> 3 "s_register_operand")]
4272   "TARGET_NEON"
4274   rtx tmp = gen_reg_rtx (<MODE>mode);
4275   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4276   emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4277                                          tmp, const0_rtx));
4278   DONE;
4281 (define_expand "neon_vmlalu_n<mode>"
4282   [(match_operand:<V_widen> 0 "s_register_operand")
4283    (match_operand:<V_widen> 1 "s_register_operand")
4284    (match_operand:VMDI 2 "s_register_operand")
4285    (match_operand:<V_elem> 3 "s_register_operand")]
4286   "TARGET_NEON"
4288   rtx tmp = gen_reg_rtx (<MODE>mode);
4289   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4290   emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4291                                          tmp, const0_rtx));
4292   DONE;
4295 (define_expand "neon_vqdmlal_n<mode>"
4296   [(match_operand:<V_widen> 0 "s_register_operand")
4297    (match_operand:<V_widen> 1 "s_register_operand")
4298    (match_operand:VMDI 2 "s_register_operand")
4299    (match_operand:<V_elem> 3 "s_register_operand")]
4300   "TARGET_NEON"
4302   rtx tmp = gen_reg_rtx (<MODE>mode);
4303   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4304   emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4305                                           tmp, const0_rtx));
4306   DONE;
4309 (define_expand "neon_vmls_n<mode>"
4310   [(match_operand:VMD 0 "s_register_operand")
4311    (match_operand:VMD 1 "s_register_operand")
4312    (match_operand:VMD 2 "s_register_operand")
4313    (match_operand:<V_elem> 3 "s_register_operand")]
4314   "TARGET_NEON"
4316   rtx tmp = gen_reg_rtx (<MODE>mode);
4317   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4318   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4319                                        tmp, const0_rtx));
4320   DONE;
4323 (define_expand "neon_vmls_n<mode>"
4324   [(match_operand:VMQ 0 "s_register_operand")
4325    (match_operand:VMQ 1 "s_register_operand")
4326    (match_operand:VMQ 2 "s_register_operand")
4327    (match_operand:<V_elem> 3 "s_register_operand")]
4328   "TARGET_NEON"
4330   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4331   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4332   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4333                                        tmp, const0_rtx));
4334   DONE;
4337 (define_expand "neon_vmlsls_n<mode>"
4338   [(match_operand:<V_widen> 0 "s_register_operand")
4339    (match_operand:<V_widen> 1 "s_register_operand")
4340    (match_operand:VMDI 2 "s_register_operand")
4341    (match_operand:<V_elem> 3 "s_register_operand")]
4342   "TARGET_NEON"
4344   rtx tmp = gen_reg_rtx (<MODE>mode);
4345   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4346   emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4347                                         tmp, const0_rtx));
4348   DONE;
4351 (define_expand "neon_vmlslu_n<mode>"
4352   [(match_operand:<V_widen> 0 "s_register_operand")
4353    (match_operand:<V_widen> 1 "s_register_operand")
4354    (match_operand:VMDI 2 "s_register_operand")
4355    (match_operand:<V_elem> 3 "s_register_operand")]
4356   "TARGET_NEON"
4358   rtx tmp = gen_reg_rtx (<MODE>mode);
4359   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4360   emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4361                                         tmp, const0_rtx));
4362   DONE;
4365 (define_expand "neon_vqdmlsl_n<mode>"
4366   [(match_operand:<V_widen> 0 "s_register_operand")
4367    (match_operand:<V_widen> 1 "s_register_operand")
4368    (match_operand:VMDI 2 "s_register_operand")
4369    (match_operand:<V_elem> 3 "s_register_operand")]
4370   "TARGET_NEON"
4372   rtx tmp = gen_reg_rtx (<MODE>mode);
4373   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4374   emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4375                                           tmp, const0_rtx));
4376   DONE;
4379 (define_insn "@neon_vext<mode>"
4380   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4381         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4382                       (match_operand:VDQX 2 "s_register_operand" "w")
4383                       (match_operand:SI 3 "immediate_operand" "i")]
4384                      UNSPEC_VEXT))]
4385   "TARGET_NEON"
4387   arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4388   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4390   [(set_attr "type" "neon_ext<q>")]
4393 (define_insn "@neon_vrev64<mode>"
4394   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4395         (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4396                     UNSPEC_VREV64))]
4397   "TARGET_NEON"
4398   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4399   [(set_attr "type" "neon_rev<q>")]
4402 (define_insn "@neon_vrev32<mode>"
4403   [(set (match_operand:VX 0 "s_register_operand" "=w")
4404         (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4405                    UNSPEC_VREV32))]
4406   "TARGET_NEON"
4407   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4408   [(set_attr "type" "neon_rev<q>")]
4411 (define_insn "@neon_vrev16<mode>"
4412   [(set (match_operand:VE 0 "s_register_operand" "=w")
4413         (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4414                    UNSPEC_VREV16))]
4415   "TARGET_NEON"
4416   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4417   [(set_attr "type" "neon_rev<q>")]
4420 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4421 ; allocation. For an intrinsic of form:
4422 ;   rD = vbsl_* (rS, rN, rM)
4423 ; We can use any of:
4424 ;   vbsl rS, rN, rM  (if D = S)
4425 ;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4426 ;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4428 (define_insn "neon_vbsl<mode>_internal"
4429   [(set (match_operand:VDQX 0 "s_register_operand"               "=w,w,w")
4430         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4431                       (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4432                       (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4433                      UNSPEC_VBSL))]
4434   "TARGET_NEON"
4435   "@
4436   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4437   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4438   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4439   [(set_attr "type" "neon_bsl<q>")]
4442 (define_expand "@neon_vbsl<mode>"
4443   [(set (match_operand:VDQX 0 "s_register_operand")
4444         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4445                       (match_operand:VDQX 2 "s_register_operand")
4446                       (match_operand:VDQX 3 "s_register_operand")]
4447                      UNSPEC_VBSL))]
4448   "TARGET_NEON"
4450   /* We can't alias operands together if they have different modes.  */
4451   operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4454 ;; vshl, vrshl
4455 (define_insn "neon_v<shift_op><sup><mode>"
4456   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4457         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4458                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4459                       VSHL))]
4460   "TARGET_NEON"
4461   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4462   [(set_attr "type" "neon_shift_imm<q>")]
4465 ;; vqshl, vqrshl
4466 (define_insn "neon_v<shift_op><sup><mode>"
4467   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4468         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4469                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4470                       VQSHL))]
4471   "TARGET_NEON"
4472   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4473   [(set_attr "type" "neon_sat_shift_imm<q>")]
4476 ;; vshr_n, vrshr_n
4477 (define_insn "neon_v<shift_op><sup>_n<mode>"
4478   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4479         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4480                        (match_operand:SI 2 "immediate_operand" "i")]
4481                       VSHR_N))]
4482   "TARGET_NEON"
4484   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4485   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4487   [(set_attr "type" "neon_shift_imm<q>")]
4490 ;; vshrn_n, vrshrn_n
4491 (define_insn "neon_v<shift_op>_n<mode>"
4492   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4493         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4494                             (match_operand:SI 2 "immediate_operand" "i")]
4495                            VSHRN_N))]
4496   "TARGET_NEON"
4498   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4499   return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4501   [(set_attr "type" "neon_shift_imm_narrow_q")]
4504 ;; vqshrn_n, vqrshrn_n
4505 (define_insn "neon_v<shift_op><sup>_n<mode>"
4506   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4507         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4508                             (match_operand:SI 2 "immediate_operand" "i")]
4509                            VQSHRN_N))]
4510   "TARGET_NEON"
4512   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4513   return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4515   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4518 ;; vqshrun_n, vqrshrun_n
4519 (define_insn "neon_v<shift_op>_n<mode>"
4520   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4521         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4522                             (match_operand:SI 2 "immediate_operand" "i")]
4523                            VQSHRUN_N))]
4524   "TARGET_NEON"
4526   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4527   return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4529   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4532 (define_insn "neon_vshl_n<mode>"
4533   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4534         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4535                        (match_operand:SI 2 "immediate_operand" "i")]
4536                       UNSPEC_VSHL_N))]
4537   "TARGET_NEON"
4539   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4540   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4542   [(set_attr "type" "neon_shift_imm<q>")]
4545 (define_insn "neon_vqshl_<sup>_n<mode>"
4546   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4547         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4548                        (match_operand:SI 2 "immediate_operand" "i")]
4549                       VQSHL_N))]
4550   "TARGET_NEON"
4552   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4553   return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4555   [(set_attr "type" "neon_sat_shift_imm<q>")]
4558 (define_insn "neon_vqshlu_n<mode>"
4559   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4560         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4561                        (match_operand:SI 2 "immediate_operand" "i")]
4562                       UNSPEC_VQSHLU_N))]
4563   "TARGET_NEON"
4565   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4566   return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4568   [(set_attr "type" "neon_sat_shift_imm<q>")]
4571 (define_insn "neon_vshll<sup>_n<mode>"
4572   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4573         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4574                            (match_operand:SI 2 "immediate_operand" "i")]
4575                           VSHLL_N))]
4576   "TARGET_NEON"
4578   /* The boundaries are: 0 < imm <= size.  */
4579   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4580   return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4582   [(set_attr "type" "neon_shift_imm_long")]
4585 ;; vsra_n, vrsra_n
4586 (define_insn "neon_v<shift_op><sup>_n<mode>"
4587   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4588         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4589                        (match_operand:VDQIX 2 "s_register_operand" "w")
4590                        (match_operand:SI 3 "immediate_operand" "i")]
4591                       VSRA_N))]
4592   "TARGET_NEON"
4594   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4595   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4597   [(set_attr "type" "neon_shift_acc<q>")]
4600 (define_insn "neon_vsri_n<mode>"
4601   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4602         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4603                        (match_operand:VDQIX 2 "s_register_operand" "w")
4604                        (match_operand:SI 3 "immediate_operand" "i")]
4605                       UNSPEC_VSRI))]
4606   "TARGET_NEON"
4608   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4609   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4611   [(set_attr "type" "neon_shift_reg<q>")]
4614 (define_insn "neon_vsli_n<mode>"
4615   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4616         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4617                        (match_operand:VDQIX 2 "s_register_operand" "w")
4618                        (match_operand:SI 3 "immediate_operand" "i")]
4619                       UNSPEC_VSLI))]
4620   "TARGET_NEON"
4622   arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4623   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4625   [(set_attr "type" "neon_shift_reg<q>")]
4628 (define_insn "neon_vtbl1v8qi"
4629   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4630         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4631                       (match_operand:V8QI 2 "s_register_operand" "w")]
4632                      UNSPEC_VTBL))]
4633   "TARGET_NEON"
4634   "vtbl.8\t%P0, {%P1}, %P2"
4635   [(set_attr "type" "neon_tbl1")]
4638 (define_insn "neon_vtbl2v8qi"
4639   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4640         (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4641                       (match_operand:V8QI 2 "s_register_operand" "w")]
4642                      UNSPEC_VTBL))]
4643   "TARGET_NEON"
4645   rtx ops[4];
4646   int tabbase = REGNO (operands[1]);
4648   ops[0] = operands[0];
4649   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4650   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4651   ops[3] = operands[2];
4652   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4654   return "";
4656   [(set_attr "type" "neon_tbl2")]
4659 (define_insn "neon_vtbl3v8qi"
4660   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4661         (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4662                       (match_operand:V8QI 2 "s_register_operand" "w")]
4663                      UNSPEC_VTBL))]
4664   "TARGET_NEON"
4666   rtx ops[5];
4667   int tabbase = REGNO (operands[1]);
4669   ops[0] = operands[0];
4670   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4671   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4672   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4673   ops[4] = operands[2];
4674   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4676   return "";
4678   [(set_attr "type" "neon_tbl3")]
4681 (define_insn "neon_vtbl4v8qi"
4682   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4683         (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4684                       (match_operand:V8QI 2 "s_register_operand" "w")]
4685                      UNSPEC_VTBL))]
4686   "TARGET_NEON"
4688   rtx ops[6];
4689   int tabbase = REGNO (operands[1]);
4691   ops[0] = operands[0];
4692   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4693   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4694   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4695   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4696   ops[5] = operands[2];
4697   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4699   return "";
4701   [(set_attr "type" "neon_tbl4")]
4704 ;; These three are used by the vec_perm infrastructure for V16QImode.
4705 (define_insn_and_split "neon_vtbl1v16qi"
4706   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4707         (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4708                        (match_operand:V16QI 2 "s_register_operand" "w")]
4709                       UNSPEC_VTBL))]
4710   "TARGET_NEON"
4711   "#"
4712   "&& reload_completed"
4713   [(const_int 0)]
4715   rtx op0, op1, op2, part0, part2;
4716   unsigned ofs;
4718   op0 = operands[0];
4719   op1 = gen_lowpart (TImode, operands[1]);
4720   op2 = operands[2];
4722   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4723   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4724   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4725   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4727   ofs = subreg_highpart_offset (V8QImode, V16QImode);
4728   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4729   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4730   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4731   DONE;
4733   [(set_attr "type" "multiple")]
4736 (define_insn_and_split "neon_vtbl2v16qi"
4737   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4738         (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4739                        (match_operand:V16QI 2 "s_register_operand" "w")]
4740                       UNSPEC_VTBL))]
4741   "TARGET_NEON"
4742   "#"
4743   "&& reload_completed"
4744   [(const_int 0)]
4746   rtx op0, op1, op2, part0, part2;
4747   unsigned ofs;
4749   op0 = operands[0];
4750   op1 = operands[1];
4751   op2 = operands[2];
4753   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4754   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4755   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4756   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4758   ofs = subreg_highpart_offset (V8QImode, V16QImode);
4759   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4760   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4761   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4762   DONE;
4764   [(set_attr "type" "multiple")]
4767 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4768 ;; handle quad-word input modes, producing octa-word output modes.  But
4769 ;; that requires us to add support for octa-word vector modes in moves.
4770 ;; That seems overkill for this one use in vec_perm.
4771 (define_insn_and_split "neon_vcombinev16qi"
4772   [(set (match_operand:OI 0 "s_register_operand" "=w")
4773         (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4774                     (match_operand:V16QI 2 "s_register_operand" "w")]
4775                    UNSPEC_VCONCAT))]
4776   "TARGET_NEON"
4777   "#"
4778   "&& reload_completed"
4779   [(const_int 0)]
4781   neon_split_vcombine (operands);
4782   DONE;
4784 [(set_attr "type" "multiple")]
4787 (define_insn "neon_vtbx1v8qi"
4788   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4789         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4790                       (match_operand:V8QI 2 "s_register_operand" "w")
4791                       (match_operand:V8QI 3 "s_register_operand" "w")]
4792                      UNSPEC_VTBX))]
4793   "TARGET_NEON"
4794   "vtbx.8\t%P0, {%P2}, %P3"
4795   [(set_attr "type" "neon_tbl1")]
4798 (define_insn "neon_vtbx2v8qi"
4799   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4800         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4801                       (match_operand:TI 2 "s_register_operand" "w")
4802                       (match_operand:V8QI 3 "s_register_operand" "w")]
4803                      UNSPEC_VTBX))]
4804   "TARGET_NEON"
4806   rtx ops[4];
4807   int tabbase = REGNO (operands[2]);
4809   ops[0] = operands[0];
4810   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4811   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4812   ops[3] = operands[3];
4813   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4815   return "";
4817   [(set_attr "type" "neon_tbl2")]
4820 (define_insn "neon_vtbx3v8qi"
4821   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4822         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4823                       (match_operand:EI 2 "s_register_operand" "w")
4824                       (match_operand:V8QI 3 "s_register_operand" "w")]
4825                      UNSPEC_VTBX))]
4826   "TARGET_NEON"
4828   rtx ops[5];
4829   int tabbase = REGNO (operands[2]);
4831   ops[0] = operands[0];
4832   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4833   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4834   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4835   ops[4] = operands[3];
4836   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4838   return "";
4840   [(set_attr "type" "neon_tbl3")]
4843 (define_insn "neon_vtbx4v8qi"
4844   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4845         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4846                       (match_operand:OI 2 "s_register_operand" "w")
4847                       (match_operand:V8QI 3 "s_register_operand" "w")]
4848                      UNSPEC_VTBX))]
4849   "TARGET_NEON"
4851   rtx ops[6];
4852   int tabbase = REGNO (operands[2]);
4854   ops[0] = operands[0];
4855   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4856   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4857   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4858   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4859   ops[5] = operands[3];
4860   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4862   return "";
4864   [(set_attr "type" "neon_tbl4")]
4867 (define_expand "@neon_vtrn<mode>_internal"
4868   [(parallel
4869     [(set (match_operand:VDQWH 0 "s_register_operand")
4870           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4871                          (match_operand:VDQWH 2 "s_register_operand")]
4872            UNSPEC_VTRN1))
4873      (set (match_operand:VDQWH 3 "s_register_operand")
4874           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4875   "TARGET_NEON"
4876   ""
4879 ;; Note: Different operand numbering to handle tied registers correctly.
4880 (define_insn "*neon_vtrn<mode>_insn"
4881   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4882         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4883                        (match_operand:VDQWH 3 "s_register_operand" "2")]
4884          UNSPEC_VTRN1))
4885    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4886         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4887          UNSPEC_VTRN2))]
4888   "TARGET_NEON"
4889   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4890   [(set_attr "type" "neon_permute<q>")]
4893 (define_expand "@neon_vzip<mode>_internal"
4894   [(parallel
4895     [(set (match_operand:VDQWH 0 "s_register_operand")
4896           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4897                          (match_operand:VDQWH 2 "s_register_operand")]
4898            UNSPEC_VZIP1))
4899     (set (match_operand:VDQWH 3 "s_register_operand")
4900          (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4901   "TARGET_NEON"
4902   ""
4905 ;; Note: Different operand numbering to handle tied registers correctly.
4906 (define_insn "*neon_vzip<mode>_insn"
4907   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4908         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4909                        (match_operand:VDQWH 3 "s_register_operand" "2")]
4910          UNSPEC_VZIP1))
4911    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4912         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4913          UNSPEC_VZIP2))]
4914   "TARGET_NEON"
4915   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4916   [(set_attr "type" "neon_zip<q>")]
4919 (define_expand "@neon_vuzp<mode>_internal"
4920   [(parallel
4921     [(set (match_operand:VDQWH 0 "s_register_operand")
4922           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4923                         (match_operand:VDQWH 2 "s_register_operand")]
4924            UNSPEC_VUZP1))
4925      (set (match_operand:VDQWH 3 "s_register_operand")
4926           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4927   "TARGET_NEON"
4928   ""
4931 ;; Note: Different operand numbering to handle tied registers correctly.
4932 (define_insn "*neon_vuzp<mode>_insn"
4933   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4934         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4935                        (match_operand:VDQWH 3 "s_register_operand" "2")]
4936          UNSPEC_VUZP1))
4937    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4938         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4939          UNSPEC_VUZP2))]
4940   "TARGET_NEON"
4941   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4942   [(set_attr "type" "neon_zip<q>")]
4945 (define_expand "vec_load_lanes<mode><mode>"
4946   [(set (match_operand:VDQX 0 "s_register_operand")
4947         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4948                      UNSPEC_VLD1))]
4949   "TARGET_NEON")
4951 (define_insn "neon_vld1<mode>"
4952   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4953         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4954                     UNSPEC_VLD1))]
4955   "TARGET_NEON"
4956   "vld1.<V_sz_elem>\t%h0, %A1"
4957   [(set_attr "type" "neon_load1_1reg<q>")]
4960 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4961 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4962 ;; lane order here.
4963 (define_insn "neon_vld1_lane<mode>"
4964   [(set (match_operand:VDX 0 "s_register_operand" "=w")
4965         (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4966                      (match_operand:VDX 2 "s_register_operand" "0")
4967                      (match_operand:SI 3 "immediate_operand" "i")]
4968                     UNSPEC_VLD1_LANE))]
4969   "TARGET_NEON"
4971   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4972   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4973   operands[3] = GEN_INT (lane);
4974   if (max == 1)
4975     return "vld1.<V_sz_elem>\t%P0, %A1";
4976   else
4977     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4979   [(set_attr "type" "neon_load1_one_lane<q>")]
4982 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4983 ;; here on big endian targets.
4984 (define_insn "neon_vld1_lane<mode>"
4985   [(set (match_operand:VQX 0 "s_register_operand" "=w")
4986         (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4987                      (match_operand:VQX 2 "s_register_operand" "0")
4988                      (match_operand:SI 3 "immediate_operand" "i")]
4989                     UNSPEC_VLD1_LANE))]
4990   "TARGET_NEON"
4992   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4993   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4994   operands[3] = GEN_INT (lane);
4995   int regno = REGNO (operands[0]);
4996   if (lane >= max / 2)
4997     {
4998       lane -= max / 2;
4999       regno += 2;
5000       operands[3] = GEN_INT (lane);
5001     }
5002   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5003   if (max == 2)
5004     return "vld1.<V_sz_elem>\t%P0, %A1";
5005   else
5006     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5008   [(set_attr "type" "neon_load1_one_lane<q>")]
5011 (define_insn "neon_vld1_dup<mode>"
5012   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5013         (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5014   "TARGET_NEON"
5015   "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5016   [(set_attr "type" "neon_load1_all_lanes<q>")]
5019 ;; Special case for DImode.  Treat it exactly like a simple load.
5020 (define_expand "neon_vld1_dupdi"
5021   [(set (match_operand:DI 0 "s_register_operand")
5022         (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5023                    UNSPEC_VLD1))]
5024   "TARGET_NEON"
5025   ""
5028 (define_insn "neon_vld1_dup<mode>"
5029   [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5030         (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5031   "TARGET_NEON"
5033   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5035   [(set_attr "type" "neon_load1_all_lanes<q>")]
5038 (define_insn_and_split "neon_vld1_dupv2di"
5039    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5040     (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5041    "TARGET_NEON"
5042    "#"
5043    "&& reload_completed"
5044    [(const_int 0)]
5045    {
5046     rtx tmprtx = gen_lowpart (DImode, operands[0]);
5047     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5048     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5049     DONE;
5050     }
5051   [(set_attr "length" "8")
5052    (set_attr "type" "neon_load1_all_lanes_q")]
5055 (define_expand "vec_store_lanes<mode><mode>"
5056   [(set (match_operand:VDQX 0 "neon_struct_operand")
5057         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5058                      UNSPEC_VST1))]
5059   "TARGET_NEON")
5061 (define_insn "neon_vst1<mode>"
5062   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5063         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5064                      UNSPEC_VST1))]
5065   "TARGET_NEON"
5066   "vst1.<V_sz_elem>\t%h1, %A0"
5067   [(set_attr "type" "neon_store1_1reg<q>")])
5069 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5070 ;; here on big endian targets.
5071 (define_insn "neon_vst1_lane<mode>"
5072   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5073         (unspec:<V_elem>
5074           [(match_operand:VDX 1 "s_register_operand" "w")
5075            (match_operand:SI 2 "immediate_operand" "i")]
5076           UNSPEC_VST1_LANE))]
5077   "TARGET_NEON"
5079   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5080   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5081   operands[2] = GEN_INT (lane);
5082   if (max == 1)
5083     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5084   else
5085     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5087   [(set_attr "type" "neon_store1_one_lane<q>")]
5090 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5091 ;; here on big endian targets.
5092 (define_insn "neon_vst1_lane<mode>"
5093   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5094         (unspec:<V_elem>
5095           [(match_operand:VQX 1 "s_register_operand" "w")
5096            (match_operand:SI 2 "immediate_operand" "i")]
5097           UNSPEC_VST1_LANE))]
5098   "TARGET_NEON"
5100   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5101   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5102   int regno = REGNO (operands[1]);
5103   if (lane >= max / 2)
5104     {
5105       lane -= max / 2;
5106       regno += 2;
5107     }
5108   operands[2] = GEN_INT (lane);
5109   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5110   if (max == 2)
5111     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5112   else
5113     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5115   [(set_attr "type" "neon_store1_one_lane<q>")]
5118 (define_expand "vec_load_lanesti<mode>"
5119   [(set (match_operand:TI 0 "s_register_operand")
5120         (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5121                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5122                    UNSPEC_VLD2))]
5123   "TARGET_NEON")
5125 (define_insn "neon_vld2<mode>"
5126   [(set (match_operand:TI 0 "s_register_operand" "=w")
5127         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5128                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5129                    UNSPEC_VLD2))]
5130   "TARGET_NEON"
5132   if (<V_sz_elem> == 64)
5133     return "vld1.64\t%h0, %A1";
5134   else
5135     return "vld2.<V_sz_elem>\t%h0, %A1";
5137   [(set (attr "type")
5138       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5139                     (const_string "neon_load1_2reg<q>")
5140                     (const_string "neon_load2_2reg<q>")))]
5143 (define_insn "neon_vld2<mode>"
5144   [(set (match_operand:OI 0 "s_register_operand" "=w")
5145         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5146                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5147                    UNSPEC_VLD2))]
5148   "TARGET_NEON"
5149   "vld2.<V_sz_elem>\t%h0, %A1"
5150   [(set_attr "type" "neon_load2_2reg_q")])
5152 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5153 ;; here on big endian targets.
5154 (define_insn "neon_vld2_lane<mode>"
5155   [(set (match_operand:TI 0 "s_register_operand" "=w")
5156         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5157                     (match_operand:TI 2 "s_register_operand" "0")
5158                     (match_operand:SI 3 "immediate_operand" "i")
5159                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5160                    UNSPEC_VLD2_LANE))]
5161   "TARGET_NEON"
5163   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5164   int regno = REGNO (operands[0]);
5165   rtx ops[4];
5166   ops[0] = gen_rtx_REG (DImode, regno);
5167   ops[1] = gen_rtx_REG (DImode, regno + 2);
5168   ops[2] = operands[1];
5169   ops[3] = GEN_INT (lane);
5170   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5171   return "";
5173   [(set_attr "type" "neon_load2_one_lane<q>")]
5176 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5177 ;; here on big endian targets.
5178 (define_insn "neon_vld2_lane<mode>"
5179   [(set (match_operand:OI 0 "s_register_operand" "=w")
5180         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5181                     (match_operand:OI 2 "s_register_operand" "0")
5182                     (match_operand:SI 3 "immediate_operand" "i")
5183                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5184                    UNSPEC_VLD2_LANE))]
5185   "TARGET_NEON"
5187   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5188   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5189   int regno = REGNO (operands[0]);
5190   rtx ops[4];
5191   if (lane >= max / 2)
5192     {
5193       lane -= max / 2;
5194       regno += 2;
5195     }
5196   ops[0] = gen_rtx_REG (DImode, regno);
5197   ops[1] = gen_rtx_REG (DImode, regno + 4);
5198   ops[2] = operands[1];
5199   ops[3] = GEN_INT (lane);
5200   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5201   return "";
5203   [(set_attr "type" "neon_load2_one_lane<q>")]
5206 (define_insn "neon_vld2_dup<mode>"
5207   [(set (match_operand:TI 0 "s_register_operand" "=w")
5208         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5209                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210                    UNSPEC_VLD2_DUP))]
5211   "TARGET_NEON"
5213   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5214     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5215   else
5216     return "vld1.<V_sz_elem>\t%h0, %A1";
5218   [(set (attr "type")
5219       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5220                     (const_string "neon_load2_all_lanes<q>")
5221                     (const_string "neon_load1_1reg<q>")))]
5224 (define_insn "neon_vld2_dupv8bf"
5225   [(set (match_operand:OI 0 "s_register_operand" "=w")
5226         (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5227                     (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5228                    UNSPEC_VLD2_DUP))]
5229   "TARGET_BF16_SIMD"
5230   {
5231     rtx ops[5];
5232     int tabbase = REGNO (operands[0]);
5234     ops[4] = operands[1];
5235     ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5236     ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5237     ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5238     ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5239     output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5240     return "";
5241   }
5242   [(set_attr "type" "neon_load2_all_lanes_q")]
5245 (define_expand "vec_store_lanesti<mode>"
5246   [(set (match_operand:TI 0 "neon_struct_operand")
5247         (unspec:TI [(match_operand:TI 1 "s_register_operand")
5248                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5249                    UNSPEC_VST2))]
5250   "TARGET_NEON")
5252 (define_insn "neon_vst2<mode>"
5253   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5254         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5255                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5256                    UNSPEC_VST2))]
5257   "TARGET_NEON"
5259   if (<V_sz_elem> == 64)
5260     return "vst1.64\t%h1, %A0";
5261   else
5262     return "vst2.<V_sz_elem>\t%h1, %A0";
5264   [(set (attr "type")
5265       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5266                     (const_string "neon_store1_2reg<q>")
5267                     (const_string "neon_store2_one_lane<q>")))]
5270 (define_insn "neon_vst2<mode>"
5271   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5272         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5273                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274                    UNSPEC_VST2))]
5275   "TARGET_NEON"
5276   "vst2.<V_sz_elem>\t%h1, %A0"
5277   [(set_attr "type" "neon_store2_4reg<q>")]
5280 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5281 ;; here on big endian targets.
5282 (define_insn "neon_vst2_lane<mode>"
5283   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5284         (unspec:<V_two_elem>
5285           [(match_operand:TI 1 "s_register_operand" "w")
5286            (match_operand:SI 2 "immediate_operand" "i")
5287            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5288           UNSPEC_VST2_LANE))]
5289   "TARGET_NEON"
5291   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5292   int regno = REGNO (operands[1]);
5293   rtx ops[4];
5294   ops[0] = operands[0];
5295   ops[1] = gen_rtx_REG (DImode, regno);
5296   ops[2] = gen_rtx_REG (DImode, regno + 2);
5297   ops[3] = GEN_INT (lane);
5298   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5299   return "";
5301   [(set_attr "type" "neon_store2_one_lane<q>")]
5304 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5305 ;; here on big endian targets.
5306 (define_insn "neon_vst2_lane<mode>"
5307   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5308         (unspec:<V_two_elem>
5309            [(match_operand:OI 1 "s_register_operand" "w")
5310             (match_operand:SI 2 "immediate_operand" "i")
5311             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5312            UNSPEC_VST2_LANE))]
5313   "TARGET_NEON"
5315   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5316   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5317   int regno = REGNO (operands[1]);
5318   rtx ops[4];
5319   if (lane >= max / 2)
5320     {
5321       lane -= max / 2;
5322       regno += 2;
5323     }
5324   ops[0] = operands[0];
5325   ops[1] = gen_rtx_REG (DImode, regno);
5326   ops[2] = gen_rtx_REG (DImode, regno + 4);
5327   ops[3] = GEN_INT (lane);
5328   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5329   return "";
5331   [(set_attr "type" "neon_store2_one_lane<q>")]
5334 (define_expand "vec_load_lanesei<mode>"
5335   [(set (match_operand:EI 0 "s_register_operand")
5336         (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5337                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5338                    UNSPEC_VLD3))]
5339   "TARGET_NEON")
5341 (define_insn "neon_vld3<mode>"
5342   [(set (match_operand:EI 0 "s_register_operand" "=w")
5343         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5344                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5345                    UNSPEC_VLD3))]
5346   "TARGET_NEON"
5348   if (<V_sz_elem> == 64)
5349     return "vld1.64\t%h0, %A1";
5350   else
5351     return "vld3.<V_sz_elem>\t%h0, %A1";
5353   [(set (attr "type")
5354       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5355                     (const_string "neon_load1_3reg<q>")
5356                     (const_string "neon_load3_3reg<q>")))]
5359 (define_expand "vec_load_lanesci<mode>"
5360   [(match_operand:CI 0 "s_register_operand")
5361    (match_operand:CI 1 "neon_struct_operand")
5362    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5363   "TARGET_NEON"
5365   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5366   DONE;
5369 (define_expand "neon_vld3<mode>"
5370   [(match_operand:CI 0 "s_register_operand")
5371    (match_operand:CI 1 "neon_struct_operand")
5372    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5373   "TARGET_NEON"
5375   rtx mem;
5377   mem = adjust_address (operands[1], EImode, 0);
5378   emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5379   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5380   emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5381   DONE;
5384 (define_insn "neon_vld3qa<mode>"
5385   [(set (match_operand:CI 0 "s_register_operand" "=w")
5386         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5387                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5388                    UNSPEC_VLD3A))]
5389   "TARGET_NEON"
5391   int regno = REGNO (operands[0]);
5392   rtx ops[4];
5393   ops[0] = gen_rtx_REG (DImode, regno);
5394   ops[1] = gen_rtx_REG (DImode, regno + 4);
5395   ops[2] = gen_rtx_REG (DImode, regno + 8);
5396   ops[3] = operands[1];
5397   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5398   return "";
5400   [(set_attr "type" "neon_load3_3reg<q>")]
5403 (define_insn "neon_vld3qb<mode>"
5404   [(set (match_operand:CI 0 "s_register_operand" "=w")
5405         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5406                     (match_operand:CI 2 "s_register_operand" "0")
5407                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5408                    UNSPEC_VLD3B))]
5409   "TARGET_NEON"
5411   int regno = REGNO (operands[0]);
5412   rtx ops[4];
5413   ops[0] = gen_rtx_REG (DImode, regno + 2);
5414   ops[1] = gen_rtx_REG (DImode, regno + 6);
5415   ops[2] = gen_rtx_REG (DImode, regno + 10);
5416   ops[3] = operands[1];
5417   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5418   return "";
5420   [(set_attr "type" "neon_load3_3reg<q>")]
5423 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5424 ;; here on big endian targets.
5425 (define_insn "neon_vld3_lane<mode>"
5426   [(set (match_operand:EI 0 "s_register_operand" "=w")
5427         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5428                     (match_operand:EI 2 "s_register_operand" "0")
5429                     (match_operand:SI 3 "immediate_operand" "i")
5430                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5431                    UNSPEC_VLD3_LANE))]
5432   "TARGET_NEON"
5434   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5435   int regno = REGNO (operands[0]);
5436   rtx ops[5];
5437   ops[0] = gen_rtx_REG (DImode, regno);
5438   ops[1] = gen_rtx_REG (DImode, regno + 2);
5439   ops[2] = gen_rtx_REG (DImode, regno + 4);
5440   ops[3] = operands[1];
5441   ops[4] = GEN_INT (lane);
5442   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5443                    ops);
5444   return "";
5446   [(set_attr "type" "neon_load3_one_lane<q>")]
5449 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5450 ;; here on big endian targets.
5451 (define_insn "neon_vld3_lane<mode>"
5452   [(set (match_operand:CI 0 "s_register_operand" "=w")
5453         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5454                     (match_operand:CI 2 "s_register_operand" "0")
5455                     (match_operand:SI 3 "immediate_operand" "i")
5456                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5457                    UNSPEC_VLD3_LANE))]
5458   "TARGET_NEON"
5460   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5461   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5462   int regno = REGNO (operands[0]);
5463   rtx ops[5];
5464   if (lane >= max / 2)
5465     {
5466       lane -= max / 2;
5467       regno += 2;
5468     }
5469   ops[0] = gen_rtx_REG (DImode, regno);
5470   ops[1] = gen_rtx_REG (DImode, regno + 4);
5471   ops[2] = gen_rtx_REG (DImode, regno + 8);
5472   ops[3] = operands[1];
5473   ops[4] = GEN_INT (lane);
5474   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5475                    ops);
5476   return "";
5478   [(set_attr "type" "neon_load3_one_lane<q>")]
5481 (define_insn "neon_vld3_dup<mode>"
5482   [(set (match_operand:EI 0 "s_register_operand" "=w")
5483         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5484                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5485                    UNSPEC_VLD3_DUP))]
5486   "TARGET_NEON"
5488   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5489     {
5490       int regno = REGNO (operands[0]);
5491       rtx ops[4];
5492       ops[0] = gen_rtx_REG (DImode, regno);
5493       ops[1] = gen_rtx_REG (DImode, regno + 2);
5494       ops[2] = gen_rtx_REG (DImode, regno + 4);
5495       ops[3] = operands[1];
5496       output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5497       return "";
5498     }
5499   else
5500     return "vld1.<V_sz_elem>\t%h0, %A1";
5502   [(set (attr "type")
5503       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5504                     (const_string "neon_load3_all_lanes<q>")
5505                     (const_string "neon_load1_1reg<q>")))])
5507 (define_insn "neon_vld3_dupv8bf"
5508   [(set (match_operand:CI 0 "s_register_operand" "=w")
5509         (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5510                     (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5511                    UNSPEC_VLD2_DUP))]
5512   "TARGET_BF16_SIMD"
5513   {
5514     rtx ops[4];
5515     int tabbase = REGNO (operands[0]);
5517     ops[3] = operands[1];
5518     ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5519     ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5520     ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5521     output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5522     return "";
5523   }
5524   [(set_attr "type" "neon_load3_all_lanes_q")]
5527 (define_expand "vec_store_lanesei<mode>"
5528   [(set (match_operand:EI 0 "neon_struct_operand")
5529         (unspec:EI [(match_operand:EI 1 "s_register_operand")
5530                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5531                    UNSPEC_VST3))]
5532   "TARGET_NEON")
5534 (define_insn "neon_vst3<mode>"
5535   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5536         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5537                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5538                    UNSPEC_VST3))]
5539   "TARGET_NEON"
5541   if (<V_sz_elem> == 64)
5542     return "vst1.64\t%h1, %A0";
5543   else
5544     return "vst3.<V_sz_elem>\t%h1, %A0";
5546   [(set (attr "type")
5547       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5548                     (const_string "neon_store1_3reg<q>")
5549                     (const_string "neon_store3_one_lane<q>")))])
5551 (define_expand "vec_store_lanesci<mode>"
5552   [(match_operand:CI 0 "neon_struct_operand")
5553    (match_operand:CI 1 "s_register_operand")
5554    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5555   "TARGET_NEON"
5557   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5558   DONE;
5561 (define_expand "neon_vst3<mode>"
5562   [(match_operand:CI 0 "neon_struct_operand")
5563    (match_operand:CI 1 "s_register_operand")
5564    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5565   "TARGET_NEON"
5567   rtx mem;
5569   mem = adjust_address (operands[0], EImode, 0);
5570   emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5571   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5572   emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5573   DONE;
5576 (define_insn "neon_vst3qa<mode>"
5577   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5578         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5579                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5580                    UNSPEC_VST3A))]
5581   "TARGET_NEON"
5583   int regno = REGNO (operands[1]);
5584   rtx ops[4];
5585   ops[0] = operands[0];
5586   ops[1] = gen_rtx_REG (DImode, regno);
5587   ops[2] = gen_rtx_REG (DImode, regno + 4);
5588   ops[3] = gen_rtx_REG (DImode, regno + 8);
5589   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5590   return "";
5592   [(set_attr "type" "neon_store3_3reg<q>")]
5595 (define_insn "neon_vst3qb<mode>"
5596   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5597         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5598                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599                    UNSPEC_VST3B))]
5600   "TARGET_NEON"
5602   int regno = REGNO (operands[1]);
5603   rtx ops[4];
5604   ops[0] = operands[0];
5605   ops[1] = gen_rtx_REG (DImode, regno + 2);
5606   ops[2] = gen_rtx_REG (DImode, regno + 6);
5607   ops[3] = gen_rtx_REG (DImode, regno + 10);
5608   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5609   return "";
5611   [(set_attr "type" "neon_store3_3reg<q>")]
5614 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5615 ;; here on big endian targets.
5616 (define_insn "neon_vst3_lane<mode>"
5617   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5618         (unspec:<V_three_elem>
5619            [(match_operand:EI 1 "s_register_operand" "w")
5620             (match_operand:SI 2 "immediate_operand" "i")
5621             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5622            UNSPEC_VST3_LANE))]
5623   "TARGET_NEON"
5625   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5626   int regno = REGNO (operands[1]);
5627   rtx ops[5];
5628   ops[0] = operands[0];
5629   ops[1] = gen_rtx_REG (DImode, regno);
5630   ops[2] = gen_rtx_REG (DImode, regno + 2);
5631   ops[3] = gen_rtx_REG (DImode, regno + 4);
5632   ops[4] = GEN_INT (lane);
5633   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5634                    ops);
5635   return "";
5637   [(set_attr "type" "neon_store3_one_lane<q>")]
5640 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5641 ;; here on big endian targets.
5642 (define_insn "neon_vst3_lane<mode>"
5643   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5644         (unspec:<V_three_elem>
5645            [(match_operand:CI 1 "s_register_operand" "w")
5646             (match_operand:SI 2 "immediate_operand" "i")
5647             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5648            UNSPEC_VST3_LANE))]
5649   "TARGET_NEON"
5651   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5652   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5653   int regno = REGNO (operands[1]);
5654   rtx ops[5];
5655   if (lane >= max / 2)
5656     {
5657       lane -= max / 2;
5658       regno += 2;
5659     }
5660   ops[0] = operands[0];
5661   ops[1] = gen_rtx_REG (DImode, regno);
5662   ops[2] = gen_rtx_REG (DImode, regno + 4);
5663   ops[3] = gen_rtx_REG (DImode, regno + 8);
5664   ops[4] = GEN_INT (lane);
5665   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5666                    ops);
5667   return "";
5669   [(set_attr "type" "neon_store3_one_lane<q>")]
5672 (define_expand "vec_load_lanesoi<mode>"
5673   [(set (match_operand:OI 0 "s_register_operand")
5674         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5675                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5676                    UNSPEC_VLD4))]
5677   "TARGET_NEON")
5679 (define_insn "neon_vld4<mode>"
5680   [(set (match_operand:OI 0 "s_register_operand" "=w")
5681         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5682                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683                    UNSPEC_VLD4))]
5684   "TARGET_NEON"
5686   if (<V_sz_elem> == 64)
5687     return "vld1.64\t%h0, %A1";
5688   else
5689     return "vld4.<V_sz_elem>\t%h0, %A1";
5691   [(set (attr "type")
5692       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5693                     (const_string "neon_load1_4reg<q>")
5694                     (const_string "neon_load4_4reg<q>")))]
5697 (define_expand "neon_vld4<mode>"
5698   [(match_operand:XI 0 "s_register_operand")
5699    (match_operand:XI 1 "neon_struct_operand")
5700    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5701   "TARGET_NEON"
5703   rtx mem;
5705   mem = adjust_address (operands[1], OImode, 0);
5706   emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5707   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5708   emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5709   DONE;
5712 (define_insn "neon_vld4qa<mode>"
5713   [(set (match_operand:XI 0 "s_register_operand" "=w")
5714         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5715                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5716                    UNSPEC_VLD4A))]
5717   "TARGET_NEON"
5719   int regno = REGNO (operands[0]);
5720   rtx ops[5];
5721   ops[0] = gen_rtx_REG (DImode, regno);
5722   ops[1] = gen_rtx_REG (DImode, regno + 4);
5723   ops[2] = gen_rtx_REG (DImode, regno + 8);
5724   ops[3] = gen_rtx_REG (DImode, regno + 12);
5725   ops[4] = operands[1];
5726   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5727   return "";
5729   [(set_attr "type" "neon_load4_4reg<q>")]
5732 (define_insn "neon_vld4qb<mode>"
5733   [(set (match_operand:XI 0 "s_register_operand" "=w")
5734         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5735                     (match_operand:XI 2 "s_register_operand" "0")
5736                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5737                    UNSPEC_VLD4B))]
5738   "TARGET_NEON"
5740   int regno = REGNO (operands[0]);
5741   rtx ops[5];
5742   ops[0] = gen_rtx_REG (DImode, regno + 2);
5743   ops[1] = gen_rtx_REG (DImode, regno + 6);
5744   ops[2] = gen_rtx_REG (DImode, regno + 10);
5745   ops[3] = gen_rtx_REG (DImode, regno + 14);
5746   ops[4] = operands[1];
5747   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5748   return "";
5750   [(set_attr "type" "neon_load4_4reg<q>")]
5753 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5754 ;; here on big endian targets.
5755 (define_insn "neon_vld4_lane<mode>"
5756   [(set (match_operand:OI 0 "s_register_operand" "=w")
5757         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5758                     (match_operand:OI 2 "s_register_operand" "0")
5759                     (match_operand:SI 3 "immediate_operand" "i")
5760                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5761                    UNSPEC_VLD4_LANE))]
5762   "TARGET_NEON"
5764   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5765   int regno = REGNO (operands[0]);
5766   rtx ops[6];
5767   ops[0] = gen_rtx_REG (DImode, regno);
5768   ops[1] = gen_rtx_REG (DImode, regno + 2);
5769   ops[2] = gen_rtx_REG (DImode, regno + 4);
5770   ops[3] = gen_rtx_REG (DImode, regno + 6);
5771   ops[4] = operands[1];
5772   ops[5] = GEN_INT (lane);
5773   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5774                    ops);
5775   return "";
5777   [(set_attr "type" "neon_load4_one_lane<q>")]
5780 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5781 ;; here on big endian targets.
5782 (define_insn "neon_vld4_lane<mode>"
5783   [(set (match_operand:XI 0 "s_register_operand" "=w")
5784         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5785                     (match_operand:XI 2 "s_register_operand" "0")
5786                     (match_operand:SI 3 "immediate_operand" "i")
5787                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5788                    UNSPEC_VLD4_LANE))]
5789   "TARGET_NEON"
5791   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5792   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5793   int regno = REGNO (operands[0]);
5794   rtx ops[6];
5795   if (lane >= max / 2)
5796     {
5797       lane -= max / 2;
5798       regno += 2;
5799     }
5800   ops[0] = gen_rtx_REG (DImode, regno);
5801   ops[1] = gen_rtx_REG (DImode, regno + 4);
5802   ops[2] = gen_rtx_REG (DImode, regno + 8);
5803   ops[3] = gen_rtx_REG (DImode, regno + 12);
5804   ops[4] = operands[1];
5805   ops[5] = GEN_INT (lane);
5806   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5807                    ops);
5808   return "";
5810   [(set_attr "type" "neon_load4_one_lane<q>")]
5813 (define_insn "neon_vld4_dup<mode>"
5814   [(set (match_operand:OI 0 "s_register_operand" "=w")
5815         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5816                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5817                    UNSPEC_VLD4_DUP))]
5818   "TARGET_NEON"
5820   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5821     {
5822       int regno = REGNO (operands[0]);
5823       rtx ops[5];
5824       ops[0] = gen_rtx_REG (DImode, regno);
5825       ops[1] = gen_rtx_REG (DImode, regno + 2);
5826       ops[2] = gen_rtx_REG (DImode, regno + 4);
5827       ops[3] = gen_rtx_REG (DImode, regno + 6);
5828       ops[4] = operands[1];
5829       output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5830                        ops);
5831       return "";
5832     }
5833   else
5834     return "vld1.<V_sz_elem>\t%h0, %A1";
5836   [(set (attr "type")
5837       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5838                     (const_string "neon_load4_all_lanes<q>")
5839                     (const_string "neon_load1_1reg<q>")))]
5842 (define_insn "neon_vld4_dupv8bf"
5843   [(set (match_operand:XI 0 "s_register_operand" "=w")
5844         (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5845                     (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5846                    UNSPEC_VLD2_DUP))]
5847   "TARGET_BF16_SIMD"
5848   {
5849     rtx ops[5];
5850     int tabbase = REGNO (operands[0]);
5852     ops[4] = operands[1];
5853     ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5854     ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5855     ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5856     ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5857     output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5858     return "";
5859   }
5860   [(set_attr "type" "neon_load4_all_lanes_q")]
5863 (define_expand "vec_store_lanesoi<mode>"
5864   [(set (match_operand:OI 0 "neon_struct_operand")
5865         (unspec:OI [(match_operand:OI 1 "s_register_operand")
5866                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5867                    UNSPEC_VST4))]
5868   "TARGET_NEON")
5870 (define_insn "neon_vst4<mode>"
5871   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5872         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5873                     (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5874                    UNSPEC_VST4))]
5875   "TARGET_NEON"
5877   if (<V_sz_elem> == 64)
5878     return "vst1.64\t%h1, %A0";
5879   else
5880     return "vst4.<V_sz_elem>\t%h1, %A0";
5882   [(set (attr "type")
5883       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5884                     (const_string "neon_store1_4reg<q>")
5885                     (const_string "neon_store4_4reg<q>")))]
5888 (define_expand "neon_vst4<mode>"
5889   [(match_operand:XI 0 "neon_struct_operand")
5890    (match_operand:XI 1 "s_register_operand")
5891    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5892   "TARGET_NEON"
5894   rtx mem;
5896   mem = adjust_address (operands[0], OImode, 0);
5897   emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5898   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5899   emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5900   DONE;
5903 (define_insn "neon_vst4qa<mode>"
5904   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5905         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5906                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5907                    UNSPEC_VST4A))]
5908   "TARGET_NEON"
5910   int regno = REGNO (operands[1]);
5911   rtx ops[5];
5912   ops[0] = operands[0];
5913   ops[1] = gen_rtx_REG (DImode, regno);
5914   ops[2] = gen_rtx_REG (DImode, regno + 4);
5915   ops[3] = gen_rtx_REG (DImode, regno + 8);
5916   ops[4] = gen_rtx_REG (DImode, regno + 12);
5917   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5918   return "";
5920   [(set_attr "type" "neon_store4_4reg<q>")]
5923 (define_insn "neon_vst4qb<mode>"
5924   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5925         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5926                     (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5927                    UNSPEC_VST4B))]
5928   "TARGET_NEON"
5930   int regno = REGNO (operands[1]);
5931   rtx ops[5];
5932   ops[0] = operands[0];
5933   ops[1] = gen_rtx_REG (DImode, regno + 2);
5934   ops[2] = gen_rtx_REG (DImode, regno + 6);
5935   ops[3] = gen_rtx_REG (DImode, regno + 10);
5936   ops[4] = gen_rtx_REG (DImode, regno + 14);
5937   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5938   return "";
5940   [(set_attr "type" "neon_store4_4reg<q>")]
5943 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5944 ;; here on big endian targets.
5945 (define_insn "neon_vst4_lane<mode>"
5946   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5947         (unspec:<V_four_elem>
5948            [(match_operand:OI 1 "s_register_operand" "w")
5949             (match_operand:SI 2 "immediate_operand" "i")
5950             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5951            UNSPEC_VST4_LANE))]
5952   "TARGET_NEON"
5954   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5955   int regno = REGNO (operands[1]);
5956   rtx ops[6];
5957   ops[0] = operands[0];
5958   ops[1] = gen_rtx_REG (DImode, regno);
5959   ops[2] = gen_rtx_REG (DImode, regno + 2);
5960   ops[3] = gen_rtx_REG (DImode, regno + 4);
5961   ops[4] = gen_rtx_REG (DImode, regno + 6);
5962   ops[5] = GEN_INT (lane);
5963   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5964                    ops);
5965   return "";
5967   [(set_attr "type" "neon_store4_one_lane<q>")]
5970 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5971 ;; here on big endian targets.
5972 (define_insn "neon_vst4_lane<mode>"
5973   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5974         (unspec:<V_four_elem>
5975            [(match_operand:XI 1 "s_register_operand" "w")
5976             (match_operand:SI 2 "immediate_operand" "i")
5977             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5978            UNSPEC_VST4_LANE))]
5979   "TARGET_NEON"
5981   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5982   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5983   int regno = REGNO (operands[1]);
5984   rtx ops[6];
5985   if (lane >= max / 2)
5986     {
5987       lane -= max / 2;
5988       regno += 2;
5989     }
5990   ops[0] = operands[0];
5991   ops[1] = gen_rtx_REG (DImode, regno);
5992   ops[2] = gen_rtx_REG (DImode, regno + 4);
5993   ops[3] = gen_rtx_REG (DImode, regno + 8);
5994   ops[4] = gen_rtx_REG (DImode, regno + 12);
5995   ops[5] = GEN_INT (lane);
5996   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5997                    ops);
5998   return "";
6000   [(set_attr "type" "neon_store4_4reg<q>")]
6003 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6004   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6005         (SE:<V_unpack> (vec_select:<V_HALF>
6006                           (match_operand:VU 1 "register_operand" "w")
6007                           (match_operand:VU 2 "vect_par_constant_low" ""))))]
6008   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6009   "vmovl.<US><V_sz_elem> %q0, %e1"
6010   [(set_attr "type" "neon_shift_imm_long")]
6013 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6014   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6015         (SE:<V_unpack> (vec_select:<V_HALF>
6016                           (match_operand:VU 1 "register_operand" "w")
6017                           (match_operand:VU 2 "vect_par_constant_high" ""))))]
6018   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6019   "vmovl.<US><V_sz_elem> %q0, %f1"
6020   [(set_attr "type" "neon_shift_imm_long")]
6023 (define_expand "vec_unpack<US>_hi_<mode>"
6024   [(match_operand:<V_unpack> 0 "register_operand")
6025    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6026  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6027   {
6028    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6029    rtx t1;
6030    int i;
6031    for (i = 0; i < (<V_mode_nunits>/2); i++)
6032      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6033   
6034    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6035    emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 
6036                                                  operands[1], 
6037                                                  t1));
6038    DONE;
6039   }
6042 (define_expand "vec_unpack<US>_lo_<mode>"
6043   [(match_operand:<V_unpack> 0 "register_operand")
6044    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6045  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6046   {
6047    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6048    rtx t1;
6049    int i;
6050    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6051      RTVEC_ELT (v, i) = GEN_INT (i);
6052    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6053    emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 
6054                                                  operands[1], 
6055                                                  t1));
6056    DONE;
6057   }
6060 (define_insn "neon_vec_<US>mult_lo_<mode>"
6061  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6062        (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6063                            (match_operand:VU 1 "register_operand" "w") 
6064                            (match_operand:VU 2 "vect_par_constant_low" "")))
6065                         (SE:<V_unpack> (vec_select:<V_HALF>
6066                            (match_operand:VU 3 "register_operand" "w") 
6067                            (match_dup 2)))))]
6068   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6069   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6070   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6073 (define_expand "vec_widen_<US>mult_lo_<mode>"
6074   [(match_operand:<V_unpack> 0 "register_operand")
6075    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6076    (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6077  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6079    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6080    rtx t1;
6081    int i;
6082    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6083      RTVEC_ELT (v, i) = GEN_INT (i);
6084    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6086    emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6087                                                operands[1],
6088                                                t1,
6089                                                operands[2]));
6090    DONE;
6094 (define_insn "neon_vec_<US>mult_hi_<mode>"
6095  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6096       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6097                             (match_operand:VU 1 "register_operand" "w") 
6098                             (match_operand:VU 2 "vect_par_constant_high" "")))
6099                        (SE:<V_unpack> (vec_select:<V_HALF>
6100                             (match_operand:VU 3 "register_operand" "w") 
6101                             (match_dup 2)))))]
6102   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6103   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6104   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6107 (define_expand "vec_widen_<US>mult_hi_<mode>"
6108   [(match_operand:<V_unpack> 0 "register_operand")
6109    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6110    (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6111  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6113    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6114    rtx t1;
6115    int i;
6116    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6117      RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6118    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6120    emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6121                                                operands[1],
6122                                                t1,
6123                                                operands[2]));
6124    DONE;
6129 (define_insn "neon_vec_<US>shiftl_<mode>"
6130  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6131        (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6132        (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6133   "TARGET_NEON"
6135   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6137   [(set_attr "type" "neon_shift_imm_long")]
6140 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6141   [(match_operand:<V_unpack> 0 "register_operand")
6142    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6143    (match_operand:SI 2 "immediate_operand")]
6144  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6146   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6147                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6148                 operands[2]));
6149    DONE;
6153 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6154   [(match_operand:<V_unpack> 0 "register_operand")
6155    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6156    (match_operand:SI 2 "immediate_operand")]
6157  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6159   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6160                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6161                                      GET_MODE_SIZE (<V_HALF>mode)),
6162                 operands[2]));
6163    DONE;
6167 ;; Vectorize for non-neon-quad case
6168 (define_insn "neon_unpack<US>_<mode>"
6169  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6170        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6171  "TARGET_NEON"
6172  "vmovl.<US><V_sz_elem> %q0, %P1"
6173   [(set_attr "type" "neon_move")]
6176 (define_expand "vec_unpack<US>_lo_<mode>"
6177  [(match_operand:<V_double_width> 0 "register_operand")
6178   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6179  "TARGET_NEON"
6181   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6182   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6183   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6185   DONE;
6189 (define_expand "vec_unpack<US>_hi_<mode>"
6190  [(match_operand:<V_double_width> 0 "register_operand")
6191   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6192  "TARGET_NEON"
6194   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6195   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6196   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6198   DONE;
6202 (define_insn "neon_vec_<US>mult_<mode>"
6203  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6204        (mult:<V_widen> (SE:<V_widen> 
6205                            (match_operand:VDI 1 "register_operand" "w"))
6206                        (SE:<V_widen> 
6207                            (match_operand:VDI 2 "register_operand" "w"))))]
6208   "TARGET_NEON"
6209   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6210   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6213 (define_expand "vec_widen_<US>mult_hi_<mode>"
6214   [(match_operand:<V_double_width> 0 "register_operand")
6215    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6216    (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6217  "TARGET_NEON"
6219    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6220    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6221    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6222                                             
6223    DONE;
6228 (define_expand "vec_widen_<US>mult_lo_<mode>"
6229   [(match_operand:<V_double_width> 0 "register_operand")
6230    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6231    (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6232  "TARGET_NEON"
6234    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6235    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6236    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6237                                             
6238    DONE;
6243 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6244  [(match_operand:<V_double_width> 0 "register_operand")
6245    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6246    (match_operand:SI 2 "immediate_operand")]
6247  "TARGET_NEON"
6249    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6250    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6251    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6253    DONE;
6257 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6258   [(match_operand:<V_double_width> 0 "register_operand")
6259    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6260    (match_operand:SI 2 "immediate_operand")]
6261  "TARGET_NEON"
6263    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6264    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6265    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6267    DONE;
6271 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6272 ; because the ordering of vector elements in Q registers is different from what
6273 ; the semantics of the instructions require.
6275 (define_insn "vec_pack_trunc_<mode>"
6276  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6277        (vec_concat:<V_narrow_pack> 
6278                 (truncate:<V_narrow> 
6279                         (match_operand:VN 1 "register_operand" "w"))
6280                 (truncate:<V_narrow>
6281                         (match_operand:VN 2 "register_operand" "w"))))]
6282  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6283  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6284  [(set_attr "type" "multiple")
6285   (set_attr "length" "8")]
6288 ;; For the non-quad case.
6289 (define_insn "neon_vec_pack_trunc_<mode>"
6290  [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6291        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6292  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6293  "vmovn.i<V_sz_elem>\t%P0, %q1"
6294  [(set_attr "type" "neon_move_narrow_q")]
6297 (define_expand "vec_pack_trunc_<mode>"
6298  [(match_operand:<V_narrow_pack> 0 "register_operand")
6299   (match_operand:VSHFT 1 "register_operand")
6300   (match_operand:VSHFT 2 "register_operand")]
6301  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6303   rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6304   
6305   emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 
6306   emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 
6307   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6308   DONE;
6311 (define_insn "neon_vabd<mode>_2"
6312  [(set (match_operand:VF 0 "s_register_operand" "=w")
6313        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6314                          (match_operand:VF 2 "s_register_operand" "w"))))]
6315  "ARM_HAVE_NEON_<MODE>_ARITH"
6316  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6317  [(set_attr "type" "neon_fp_abd_s<q>")]
6320 (define_insn "neon_vabd<mode>_3"
6321  [(set (match_operand:VF 0 "s_register_operand" "=w")
6322        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6323                             (match_operand:VF 2 "s_register_operand" "w")]
6324                 UNSPEC_VSUB)))]
6325  "ARM_HAVE_NEON_<MODE>_ARITH"
6326  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6327  [(set_attr "type" "neon_fp_abd_s<q>")]
6330 (define_insn "neon_<sup>mmlav16qi"
6331   [(set (match_operand:V4SI 0 "register_operand" "=w")
6332         (plus:V4SI
6333          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6334                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6335          (match_operand:V4SI 1 "register_operand" "0")))]
6336   "TARGET_I8MM"
6337   "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6338   [(set_attr "type" "neon_mla_s_q")]
6341 (define_insn "neon_vbfdot<VCVTF:mode>"
6342   [(set (match_operand:VCVTF 0 "register_operand" "=w")
6343         (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6344                     (unspec:VCVTF [
6345                             (match_operand:<VSF2BF> 2 "register_operand" "w")
6346                             (match_operand:<VSF2BF> 3 "register_operand" "w")]
6347                      UNSPEC_DOT_S)))]
6348   "TARGET_BF16_SIMD"
6349   "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6350   [(set_attr "type" "neon_dot<q>")]
6353 (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6354   [(set (match_operand:VCVTF 0 "register_operand" "=w")
6355         (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6356                     (unspec:VCVTF [
6357                             (match_operand:<VSF2BF> 2 "register_operand" "w")
6358                             (match_operand:V4BF 3 "register_operand" "x")
6359                             (match_operand:SI 4 "immediate_operand" "i")]
6360                      UNSPEC_DOT_S)))]
6361   "TARGET_BF16_SIMD"
6362   "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6363   [(set_attr "type" "neon_dot<q>")]
6366 (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6367   [(set (match_operand:VCVTF 0 "register_operand" "=w")
6368         (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6369                     (unspec:VCVTF [
6370                             (match_operand:<VSF2BF> 2 "register_operand" "w")
6371                             (match_operand:V8BF 3 "register_operand" "x")
6372                             (match_operand:SI 4 "immediate_operand" "i")]
6373                      UNSPEC_DOT_S)))]
6374   "TARGET_BF16_SIMD"
6375   {
6376     int lane = INTVAL (operands[4]);
6377     int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6378     if (lane < half)
6379       return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6380     else
6381       {
6382         operands[4] = GEN_INT (lane - half);
6383         return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6384       }
6385   }
6386   [(set_attr "type" "neon_dot<q>")]
6389 (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6390   [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6391        (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6392         UNSPEC_BFCVT))]
6393   "TARGET_BF16_SIMD"
6394   "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6395   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6398 (define_insn "neon_vbfcvtv4sf_highv8bf"
6399   [(set (match_operand:V8BF 0 "register_operand" "=w")
6400        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6401                      (match_operand:V4SF 2 "register_operand" "w")]
6402         UNSPEC_BFCVT_HIGH))]
6403   "TARGET_BF16_SIMD"
6404   "vcvt.bf16.f32\\t%f0, %q2"
6405   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6408 (define_insn "neon_vbfcvtsf"
6409   [(set (match_operand:BF 0 "register_operand" "=t")
6410        (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6411         UNSPEC_BFCVT))]
6412   "TARGET_BF16_FP"
6413   "vcvtb.bf16.f32\\t%0, %1"
6414   [(set_attr "type" "f_cvt")]
6417 (define_insn "neon_vbfcvt<VBFCVT:mode>"
6418   [(set (match_operand:V4SF 0 "register_operand" "=w")
6419        (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6420         UNSPEC_BFCVT))]
6421   "TARGET_BF16_SIMD"
6422   "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6423   [(set_attr "type" "neon_shift_imm_q")]
6426 (define_insn "neon_vbfcvt_highv8bf"
6427   [(set (match_operand:V4SF 0 "register_operand" "=w")
6428        (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6429         UNSPEC_BFCVT_HIGH))]
6430   "TARGET_BF16_SIMD"
6431   "vshll.u32\\t%q0, %f1, #16"
6432   [(set_attr "type" "neon_shift_imm_q")]
6435 ;; Convert a BF scalar operand to SF via VSHL.
6436 ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6437 ;; would be allocated, therefore the operands must be converted to intermediate
6438 ;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6439 (define_expand "neon_vbfcvtbf"
6440   [(match_operand:SF 0 "register_operand")
6441    (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6442   "TARGET_BF16_FP"
6444   rtx op0 = gen_reg_rtx (V2SImode);
6445   rtx op1 = gen_reg_rtx (V2SImode);
6446   emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6447   emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6448   emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6449   DONE;
6452 ;; Convert BF mode to V2SI and V2SI to SF.
6453 ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6454 ;; register indexed by a 32-bit sub-register number.
6455 ;; This will generate reloads but compiler can optimize out the moves.
6456 ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6457 ;; range so that to avoid extra moves.
6458 (define_insn "neon_vbfcvtbf_cvtmode<mode>"
6459   [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6460        (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6461         UNSPEC_BFCVT))]
6462   "TARGET_BF16_FP"
6463   ""
6466 (define_insn "neon_vmmlav8bf"
6467   [(set (match_operand:V4SF 0 "register_operand" "=w")
6468         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6469                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6470                                  (match_operand:V8BF 3 "register_operand" "w")]
6471                     UNSPEC_BFMMLA)))]
6472   "TARGET_BF16_SIMD"
6473   "vmmla.bf16\\t%q0, %q2, %q3"
6474   [(set_attr "type" "neon_fp_mla_s_q")]
6477 (define_insn "neon_vfma<bt>v8bf"
6478   [(set (match_operand:V4SF 0 "register_operand" "=w")
6479         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6480                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6481                                   (match_operand:V8BF 3 "register_operand" "w")]
6482                      BF_MA)))]
6483   "TARGET_BF16_SIMD"
6484   "vfma<bt>.bf16\\t%q0, %q2, %q3"
6485   [(set_attr "type" "neon_fp_mla_s_q")]
6488 (define_insn "neon_vfma<bt>_lanev8bf"
6489   [(set (match_operand:V4SF 0 "register_operand" "=w")
6490         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6491                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6492                                   (match_operand:V4BF 3 "register_operand" "x")
6493                                   (match_operand:SI 4 "const_int_operand" "n")]
6494                      BF_MA)))]
6495   "TARGET_BF16_SIMD"
6496   "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6497   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6500 (define_expand "neon_vfma<bt>_laneqv8bf"
6501   [(set (match_operand:V4SF 0 "register_operand" "=w")
6502         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6503                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6504                                   (match_operand:V8BF 3 "register_operand" "x")
6505                                   (match_operand:SI 4 "const_int_operand" "n")]
6506                      BF_MA)))]
6507   "TARGET_BF16_SIMD"
6508   {
6509     int lane = INTVAL (operands[4]);
6510     gcc_assert (IN_RANGE(lane, 0, 7));
6511     if (lane < 4)
6512     {
6513         emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6514     }
6515     else
6516       {
6517         rtx op_highpart = gen_reg_rtx (V4BFmode);
6518         emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6519         operands[4] = GEN_INT (lane - 4);
6520         emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6521       }
6522     DONE;
6523   }
6524   [(set_attr "type" "neon_fp_mla_s_scalar_q")]