1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "unaligned_storev8qi"
27 [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29 UNSPEC_UNALIGNED_STORE))]
32 return output_move_neon (operands);
34 [(set_attr "type" "neon_store1_1reg")])
36 (define_insn "*neon_mov<mode>"
37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
39 (match_operand:VDXMOV 1 "general_operand"
40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
42 && (register_operand (operands[0], <MODE>mode)
43 || register_operand (operands[1], <MODE>mode))"
45 if (which_alternative == 2 || which_alternative == 3)
48 static char templ[40];
50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51 &operands[1], &width);
53 gcc_assert (is_valid != 0);
56 return "vmov.f32\t%P0, %1 @ <mode>";
58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
63 switch (which_alternative)
65 case 0: return "vmov\t%P0, %P1 @ <mode>";
66 case 1: case 4: return output_move_neon (operands);
67 case 2: case 3: gcc_unreachable ();
68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
71 default: return output_move_double (operands, true, NULL);
74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
83 (define_insn "*neon_mov<mode>"
84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
86 (match_operand:VQXMOV 1 "general_operand"
87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
89 && (register_operand (operands[0], <MODE>mode)
90 || register_operand (operands[1], <MODE>mode))"
92 if (which_alternative == 2 || which_alternative == 3)
95 static char templ[40];
97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98 &operands[1], &width);
100 gcc_assert (is_valid != 0);
103 return "vmov.f32\t%q0, %1 @ <mode>";
105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
110 switch (which_alternative)
112 case 0: return "vmov\t%q0, %q1 @ <mode>";
113 case 1: case 4: return output_move_neon (operands);
114 case 2: case 3: gcc_unreachable ();
115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
117 default: return output_move_quad (operands);
120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
128 /* We define these mov expanders to match the standard mov$a optab to prevent
129 the mid-end from trying to do a subreg for these modes which is the most
130 inefficient way to expand the move. Also big-endian subreg's aren't
131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132 Without these RTL generation patterns the mid-end would attempt to take a
133 sub-reg and may ICE if it can't. */
135 (define_expand "movti"
136 [(set (match_operand:TI 0 "nonimmediate_operand")
137 (match_operand:TI 1 "general_operand"))]
140 gcc_checking_assert (aligned_operand (operands[0], TImode));
141 gcc_checking_assert (aligned_operand (operands[1], TImode));
142 if (can_create_pseudo_p ())
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (TImode, operands[1]);
149 (define_expand "mov<mode>"
150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151 (match_operand:VSTRUCT 1 "general_operand"))]
152 "TARGET_NEON || TARGET_HAVE_MVE"
154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156 if (can_create_pseudo_p ())
158 if (!REG_P (operands[0]))
159 operands[1] = force_reg (<MODE>mode, operands[1]);
163 ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164 ;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved
165 ;; into vec-common.md file. Remaining mov expand patterns with half float and
166 ;; bfloats are implemented below.
167 (define_expand "mov<mode>"
168 [(set (match_operand:VHFBF_split 0 "s_register_operand")
169 (match_operand:VHFBF_split 1 "s_register_operand"))]
172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174 if (can_create_pseudo_p ())
176 if (!REG_P (operands[0]))
177 operands[1] = force_reg (<MODE>mode, operands[1]);
181 (define_insn "*neon_mov<mode>"
182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
184 "(TARGET_NEON || TARGET_HAVE_MVE)
185 && (register_operand (operands[0], <MODE>mode)
186 || register_operand (operands[1], <MODE>mode))"
188 switch (which_alternative)
191 case 1: case 2: return output_move_neon (operands);
192 default: gcc_unreachable ();
195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
199 [(set (match_operand:EI 0 "s_register_operand" "")
200 (match_operand:EI 1 "s_register_operand" ""))]
201 "TARGET_NEON && reload_completed"
202 [(set (match_dup 0) (match_dup 1))
203 (set (match_dup 2) (match_dup 3))]
205 int rdest = REGNO (operands[0]);
206 int rsrc = REGNO (operands[1]);
209 dest[0] = gen_rtx_REG (TImode, rdest);
210 src[0] = gen_rtx_REG (TImode, rsrc);
211 dest[1] = gen_rtx_REG (DImode, rdest + 4);
212 src[1] = gen_rtx_REG (DImode, rsrc + 4);
214 neon_disambiguate_copy (operands, dest, src, 2);
218 [(set (match_operand:OI 0 "s_register_operand" "")
219 (match_operand:OI 1 "s_register_operand" ""))]
220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))]
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
233 neon_disambiguate_copy (operands, dest, src, 2);
237 [(set (match_operand:CI 0 "s_register_operand" "")
238 (match_operand:CI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))]
244 int rdest = REGNO (operands[0]);
245 int rsrc = REGNO (operands[1]);
248 dest[0] = gen_rtx_REG (TImode, rdest);
249 src[0] = gen_rtx_REG (TImode, rsrc);
250 dest[1] = gen_rtx_REG (TImode, rdest + 4);
251 src[1] = gen_rtx_REG (TImode, rsrc + 4);
252 dest[2] = gen_rtx_REG (TImode, rdest + 8);
253 src[2] = gen_rtx_REG (TImode, rsrc + 8);
255 neon_disambiguate_copy (operands, dest, src, 3);
259 [(set (match_operand:XI 0 "s_register_operand" "")
260 (match_operand:XI 1 "s_register_operand" ""))]
261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262 [(set (match_dup 0) (match_dup 1))
263 (set (match_dup 2) (match_dup 3))
264 (set (match_dup 4) (match_dup 5))
265 (set (match_dup 6) (match_dup 7))]
267 int rdest = REGNO (operands[0]);
268 int rsrc = REGNO (operands[1]);
271 dest[0] = gen_rtx_REG (TImode, rdest);
272 src[0] = gen_rtx_REG (TImode, rsrc);
273 dest[1] = gen_rtx_REG (TImode, rdest + 4);
274 src[1] = gen_rtx_REG (TImode, rsrc + 4);
275 dest[2] = gen_rtx_REG (TImode, rdest + 8);
276 src[2] = gen_rtx_REG (TImode, rsrc + 8);
277 dest[3] = gen_rtx_REG (TImode, rdest + 12);
278 src[3] = gen_rtx_REG (TImode, rsrc + 12);
280 neon_disambiguate_copy (operands, dest, src, 4);
283 (define_insn "*movmisalign<mode>_neon_store"
284 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
285 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
286 UNSPEC_MISALIGNED_ACCESS))]
287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288 "vst1.<V_sz_elem>\t{%P1}, %A0"
289 [(set_attr "type" "neon_store1_1reg<q>")])
291 (define_insn "*movmisalign<mode>_neon_load"
292 [(set (match_operand:VDX 0 "s_register_operand" "=w")
293 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
295 UNSPEC_MISALIGNED_ACCESS))]
296 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
297 "vld1.<V_sz_elem>\t{%P0}, %A1"
298 [(set_attr "type" "neon_load1_1reg<q>")])
300 (define_insn "*movmisalign<mode>_neon_store"
301 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
302 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
303 UNSPEC_MISALIGNED_ACCESS))]
304 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
305 "vst1.<V_sz_elem>\t{%q1}, %A0"
306 [(set_attr "type" "neon_store1_1reg<q>")])
308 (define_insn "*movmisalign<mode>_neon_load"
309 [(set (match_operand:VQX 0 "s_register_operand" "=w")
310 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
312 UNSPEC_MISALIGNED_ACCESS))]
313 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
314 "vld1.<V_sz_elem>\t{%q0}, %A1"
315 [(set_attr "type" "neon_load1_1reg<q>")])
317 (define_insn "@vec_set<mode>_internal"
318 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
320 (vec_duplicate:VD_LANE
321 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
322 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
323 (match_operand:SI 2 "immediate_operand" "i,i")))]
326 int elt = ffs ((int) INTVAL (operands[2])) - 1;
327 if (BYTES_BIG_ENDIAN)
328 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
329 operands[2] = GEN_INT (elt);
331 if (which_alternative == 0)
332 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
334 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
336 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
338 (define_insn "@vec_set<mode>_internal"
339 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
342 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
343 (match_operand:VQ2 3 "s_register_operand" "0,0")
344 (match_operand:SI 2 "immediate_operand" "i,i")))]
347 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
348 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
349 int elt = elem % half_elts;
350 int hi = (elem / half_elts) * 2;
351 int regno = REGNO (operands[0]);
353 if (BYTES_BIG_ENDIAN)
354 elt = half_elts - 1 - elt;
356 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
357 operands[2] = GEN_INT (elt);
359 if (which_alternative == 0)
360 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
362 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
364 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
367 (define_insn "@vec_set<mode>_internal"
368 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
370 (vec_duplicate:V2DI_ONLY
371 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
372 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
373 (match_operand:SI 2 "immediate_operand" "i,i")))]
376 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
377 int regno = REGNO (operands[0]) + 2 * elem;
379 operands[0] = gen_rtx_REG (DImode, regno);
381 if (which_alternative == 0)
382 return "vld1.64\t%P0, %A1";
384 return "vmov\t%P0, %Q1, %R1";
386 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
389 (define_insn "vec_extract<mode><V_elem_l>"
390 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
392 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
393 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
396 if (BYTES_BIG_ENDIAN)
398 int elt = INTVAL (operands[2]);
399 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
400 operands[2] = GEN_INT (elt);
403 if (which_alternative == 0)
404 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
406 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
408 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
411 ;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
412 ;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
413 ;; by define_expand in vec-common.md file.
414 (define_insn "neon_vec_extract<mode><V_elem_l>"
415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (match_operand:VQ2 1 "s_register_operand" "w,w")
418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
421 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
422 int elt = INTVAL (operands[2]) % half_elts;
423 int hi = (INTVAL (operands[2]) / half_elts) * 2;
424 int regno = REGNO (operands[1]);
426 if (BYTES_BIG_ENDIAN)
427 elt = half_elts - 1 - elt;
429 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
430 operands[2] = GEN_INT (elt);
432 if (which_alternative == 0)
433 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
435 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
437 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
440 ;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
441 ;; and this pattern is called by define_expand in vec-common.md file.
442 (define_insn "neon_vec_extractv2didi"
443 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
445 (match_operand:V2DI 1 "s_register_operand" "w,w")
446 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
449 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
451 operands[1] = gen_rtx_REG (DImode, regno);
453 if (which_alternative == 0)
454 return "vst1.64\t{%P1}, %A0 @ v2di";
456 return "vmov\t%Q0, %R0, %P1 @ v2di";
458 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "ARM_HAVE_NEON_<MODE>_ARITH"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "*sub<mode>3_neon"
479 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
480 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
481 (match_operand:VDQ 2 "s_register_operand" "w")))]
482 "ARM_HAVE_NEON_<MODE>_ARITH"
483 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
485 (if_then_else (match_test "<Is_float_mode>")
486 (const_string "neon_fp_addsub_s<q>")
487 (const_string "neon_sub<q>")))]
490 (define_insn "*mul<mode>3_neon"
491 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
492 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
493 (match_operand:VDQW 2 "s_register_operand" "w")))]
494 "ARM_HAVE_NEON_<MODE>_ARITH"
495 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
497 (if_then_else (match_test "<Is_float_mode>")
498 (const_string "neon_fp_mul_s<q>")
499 (const_string "neon_mul_<V_elem_ch><q>")))]
502 /* Perform division using multiply-by-reciprocal.
503 Reciprocal is calculated using Newton-Raphson method.
504 Enabled with -funsafe-math-optimizations -freciprocal-math
505 and disabled for -Os since it increases code size . */
507 (define_expand "div<mode>3"
508 [(set (match_operand:VCVTF 0 "s_register_operand")
509 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
510 (match_operand:VCVTF 2 "s_register_operand")))]
511 "TARGET_NEON && !optimize_size
512 && flag_reciprocal_math"
514 rtx rec = gen_reg_rtx (<MODE>mode);
515 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
517 /* Reciprocal estimate. */
518 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
520 /* Perform 2 iterations of newton-raphson method. */
521 for (int i = 0; i < 2; i++)
523 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
524 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
527 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
528 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
534 (define_insn "mul<mode>3add<mode>_neon"
535 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
536 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
537 (match_operand:VDQW 3 "s_register_operand" "w"))
538 (match_operand:VDQW 1 "s_register_operand" "0")))]
539 "ARM_HAVE_NEON_<MODE>_ARITH"
540 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
542 (if_then_else (match_test "<Is_float_mode>")
543 (const_string "neon_fp_mla_s<q>")
544 (const_string "neon_mla_<V_elem_ch><q>")))]
547 (define_insn "mul<mode>3add<mode>_neon"
548 [(set (match_operand:VH 0 "s_register_operand" "=w")
549 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
550 (match_operand:VH 3 "s_register_operand" "w"))
551 (match_operand:VH 1 "s_register_operand" "0")))]
552 "ARM_HAVE_NEON_<MODE>_ARITH"
553 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
554 [(set_attr "type" "neon_fp_mla_s<q>")]
557 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
558 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
559 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
560 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
561 (match_operand:VDQW 3 "s_register_operand" "w"))))]
562 "ARM_HAVE_NEON_<MODE>_ARITH"
563 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
565 (if_then_else (match_test "<Is_float_mode>")
566 (const_string "neon_fp_mla_s<q>")
567 (const_string "neon_mla_<V_elem_ch><q>")))]
570 ;; Fused multiply-accumulate
571 ;; We define each insn twice here:
572 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
573 ;; to be able to use when converting to FMA.
574 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
575 (define_insn "fma<VCVTF:mode>4"
576 [(set (match_operand:VCVTF 0 "register_operand" "=w")
577 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
578 (match_operand:VCVTF 2 "register_operand" "w")
579 (match_operand:VCVTF 3 "register_operand" "0")))]
580 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
581 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
582 [(set_attr "type" "neon_fp_mla_s<q>")]
585 (define_insn "fma<VCVTF:mode>4_intrinsic"
586 [(set (match_operand:VCVTF 0 "register_operand" "=w")
587 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
588 (match_operand:VCVTF 2 "register_operand" "w")
589 (match_operand:VCVTF 3 "register_operand" "0")))]
590 "TARGET_NEON && TARGET_FMA"
591 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
592 [(set_attr "type" "neon_fp_mla_s<q>")]
595 (define_insn "fma<VH:mode>4"
596 [(set (match_operand:VH 0 "register_operand" "=w")
598 (match_operand:VH 1 "register_operand" "w")
599 (match_operand:VH 2 "register_operand" "w")
600 (match_operand:VH 3 "register_operand" "0")))]
601 "ARM_HAVE_NEON_<MODE>_ARITH"
602 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
603 [(set_attr "type" "neon_fp_mla_s<q>")]
606 (define_insn "*fmsub<VCVTF:mode>4"
607 [(set (match_operand:VCVTF 0 "register_operand" "=w")
608 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
609 (match_operand:VCVTF 2 "register_operand" "w")
610 (match_operand:VCVTF 3 "register_operand" "0")))]
611 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
612 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
613 [(set_attr "type" "neon_fp_mla_s<q>")]
616 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
617 [(set (match_operand:VCVTF 0 "register_operand" "=w")
619 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
620 (match_operand:VCVTF 2 "register_operand" "w")
621 (match_operand:VCVTF 3 "register_operand" "0")))]
622 "TARGET_NEON && TARGET_FMA"
623 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
624 [(set_attr "type" "neon_fp_mla_s<q>")]
627 (define_insn "fmsub<VH:mode>4_intrinsic"
628 [(set (match_operand:VH 0 "register_operand" "=w")
630 (neg:VH (match_operand:VH 1 "register_operand" "w"))
631 (match_operand:VH 2 "register_operand" "w")
632 (match_operand:VH 3 "register_operand" "0")))]
633 "TARGET_NEON_FP16INST"
634 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
635 [(set_attr "type" "neon_fp_mla_s<q>")]
638 (define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2"
639 [(set (match_operand:VCVTF 0 "s_register_operand")
640 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")]
642 "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
645 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
646 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
647 (unspec:VCVTF [(match_operand:VCVTF 1
648 "s_register_operand" "w")]
650 "TARGET_NEON && TARGET_VFP5"
651 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
652 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
655 (define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2"
656 [(set (match_operand:<V_cmp_result> 0 "register_operand")
657 (FIXUORS:<V_cmp_result>
658 (unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")]
660 "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
663 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
664 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
665 (FIXUORS:<V_cmp_result> (unspec:VCVTF
666 [(match_operand:VCVTF 1 "register_operand" "w")]
668 "TARGET_NEON && TARGET_VFP5"
669 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
670 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
671 (set_attr "predicable" "no")]
674 (define_insn "ior<mode>3_neon"
675 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
676 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
677 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
680 switch (which_alternative)
682 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
683 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
684 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
685 default: gcc_unreachable ();
688 [(set_attr "type" "neon_logic<q>")]
691 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
692 ;; vorr. We support the pseudo-instruction vand instead, because that
693 ;; corresponds to the canonical form the middle-end expects to use for
694 ;; immediate bitwise-ANDs.
696 (define_insn "and<mode>3_neon"
697 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
698 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
699 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
702 switch (which_alternative)
704 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
705 case 1: return neon_output_logic_immediate ("vand", &operands[2],
706 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
707 default: gcc_unreachable ();
710 [(set_attr "type" "neon_logic<q>")]
713 (define_insn "orn<mode>3_neon"
714 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
715 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
716 (match_operand:VDQ 1 "s_register_operand" "w")))]
718 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
719 [(set_attr "type" "neon_logic<q>")]
722 (define_insn "bic<mode>3_neon"
723 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
724 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
725 (match_operand:VDQ 1 "s_register_operand" "w")))]
727 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
728 [(set_attr "type" "neon_logic<q>")]
731 (define_insn "xor<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
734 (match_operand:VDQ 2 "s_register_operand" "w")))]
736 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 (define_insn "one_cmpl<mode>2_neon"
741 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
742 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
744 "vmvn\t%<V_reg>0, %<V_reg>1"
745 [(set_attr "type" "neon_move<q>")]
748 (define_insn "neon_abs<mode>2"
749 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
750 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
752 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
754 (if_then_else (match_test "<Is_float_mode>")
755 (const_string "neon_fp_abs_s<q>")
756 (const_string "neon_abs<q>")))]
759 (define_insn "neon_neg<mode>2"
760 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
761 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
763 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
765 (if_then_else (match_test "<Is_float_mode>")
766 (const_string "neon_fp_neg_s<q>")
767 (const_string "neon_neg<q>")))]
770 (define_insn "neon_<absneg_str><mode>2"
771 [(set (match_operand:VH 0 "s_register_operand" "=w")
772 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
773 "TARGET_NEON_FP16INST"
774 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
775 [(set_attr "type" "neon_abs<q>")]
778 (define_expand "neon_v<absneg_str><mode>"
780 (match_operand:VH 0 "s_register_operand")
781 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
782 "TARGET_NEON_FP16INST"
784 emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
788 (define_insn "neon_v<fp16_rnd_str><mode>"
789 [(set (match_operand:VH 0 "s_register_operand" "=w")
791 [(match_operand:VH 1 "s_register_operand" "w")]
793 "TARGET_NEON_FP16INST"
794 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
795 [(set_attr "type" "neon_fp_round_s<q>")]
798 (define_insn "neon_vrsqrte<mode>"
799 [(set (match_operand:VH 0 "s_register_operand" "=w")
801 [(match_operand:VH 1 "s_register_operand" "w")]
803 "TARGET_NEON_FP16INST"
804 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
805 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
808 (define_insn "*umin<mode>3_neon"
809 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
810 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
811 (match_operand:VDQIW 2 "s_register_operand" "w")))]
813 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
814 [(set_attr "type" "neon_minmax<q>")]
817 (define_insn "*umax<mode>3_neon"
818 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
819 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
820 (match_operand:VDQIW 2 "s_register_operand" "w")))]
822 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
823 [(set_attr "type" "neon_minmax<q>")]
826 (define_insn "*smin<mode>3_neon"
827 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
828 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
829 (match_operand:VDQW 2 "s_register_operand" "w")))]
831 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
833 (if_then_else (match_test "<Is_float_mode>")
834 (const_string "neon_fp_minmax_s<q>")
835 (const_string "neon_minmax<q>")))]
838 (define_insn "*smax<mode>3_neon"
839 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
840 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
841 (match_operand:VDQW 2 "s_register_operand" "w")))]
843 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
845 (if_then_else (match_test "<Is_float_mode>")
846 (const_string "neon_fp_minmax_s<q>")
847 (const_string "neon_minmax<q>")))]
850 ; TODO: V2DI shifts are current disabled because there are bugs in the
851 ; generic vectorizer code. It ends up creating a V2DI constructor with
854 (define_insn "vashr<mode>3_imm"
855 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
856 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
857 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
860 return neon_output_shift_immediate ("vshr", 's', &operands[2],
861 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
864 [(set_attr "type" "neon_shift_imm<q>")]
867 (define_insn "vlshr<mode>3_imm"
868 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
869 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
870 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
873 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
874 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
877 [(set_attr "type" "neon_shift_imm<q>")]
880 ; Used for implementing logical shift-right, which is a left-shift by a negative
881 ; amount, with signed operands. This is essentially the same as ashl<mode>3
882 ; above, but using an unspec in case GCC tries anything tricky with negative
885 (define_insn "ashl<mode>3_signed"
886 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
887 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
888 (match_operand:VDQI 2 "s_register_operand" "w")]
889 UNSPEC_ASHIFT_SIGNED))]
891 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
892 [(set_attr "type" "neon_shift_reg<q>")]
895 ; Used for implementing logical shift-right, which is a left-shift by a negative
896 ; amount, with unsigned operands.
898 (define_insn "ashl<mode>3_unsigned"
899 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
900 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
901 (match_operand:VDQI 2 "s_register_operand" "w")]
902 UNSPEC_ASHIFT_UNSIGNED))]
904 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
905 [(set_attr "type" "neon_shift_reg<q>")]
910 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
911 ;; leaving the upper half uninitalized. This is OK since the shift
912 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
913 ;; data flow analysis however, we pretend the full register is set
915 (define_insn "neon_load_count"
916 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
917 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
921 vld1.32\t{%P0[0]}, %A1
923 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
926 ;; Widening operations
928 (define_expand "widen_ssum<mode>3"
929 [(set (match_operand:<V_double_width> 0 "s_register_operand")
930 (plus:<V_double_width>
931 (sign_extend:<V_double_width>
932 (match_operand:VQI 1 "s_register_operand"))
933 (match_operand:<V_double_width> 2 "s_register_operand")))]
936 machine_mode mode = GET_MODE (operands[1]);
939 p1 = arm_simd_vect_par_cnst_half (mode, false);
940 p2 = arm_simd_vect_par_cnst_half (mode, true);
942 if (operands[0] != operands[2])
943 emit_move_insn (operands[0], operands[2]);
945 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
949 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
957 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
958 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
959 (plus:<V_double_width>
960 (sign_extend:<V_double_width>
962 (match_operand:VQI 1 "s_register_operand" "%w")
963 (match_operand:VQI 2 "vect_par_constant_low" "")))
964 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
967 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
968 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
970 [(set_attr "type" "neon_add_widen")])
972 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
973 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
974 (plus:<V_double_width>
975 (sign_extend:<V_double_width>
977 (match_operand:VQI 1 "s_register_operand" "%w")
978 (match_operand:VQI 2 "vect_par_constant_high" "")))
979 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
982 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
983 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
985 [(set_attr "type" "neon_add_widen")])
987 (define_insn "widen_ssum<mode>3"
988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
990 (sign_extend:<V_widen>
991 (match_operand:VW 1 "s_register_operand" "%w"))
992 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
994 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
995 [(set_attr "type" "neon_add_widen")]
998 (define_expand "widen_usum<mode>3"
999 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1000 (plus:<V_double_width>
1001 (zero_extend:<V_double_width>
1002 (match_operand:VQI 1 "s_register_operand"))
1003 (match_operand:<V_double_width> 2 "s_register_operand")))]
1006 machine_mode mode = GET_MODE (operands[1]);
1009 p1 = arm_simd_vect_par_cnst_half (mode, false);
1010 p2 = arm_simd_vect_par_cnst_half (mode, true);
1012 if (operands[0] != operands[2])
1013 emit_move_insn (operands[0], operands[2]);
1015 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1019 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1027 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1028 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1029 (plus:<V_double_width>
1030 (zero_extend:<V_double_width>
1031 (vec_select:<V_HALF>
1032 (match_operand:VQI 1 "s_register_operand" "%w")
1033 (match_operand:VQI 2 "vect_par_constant_low" "")))
1034 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1037 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1038 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1040 [(set_attr "type" "neon_add_widen")])
1042 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1043 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1044 (plus:<V_double_width>
1045 (zero_extend:<V_double_width>
1046 (vec_select:<V_HALF>
1047 (match_operand:VQI 1 "s_register_operand" "%w")
1048 (match_operand:VQI 2 "vect_par_constant_high" "")))
1049 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1052 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1053 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1055 [(set_attr "type" "neon_add_widen")])
1057 (define_insn "widen_usum<mode>3"
1058 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1059 (plus:<V_widen> (zero_extend:<V_widen>
1060 (match_operand:VW 1 "s_register_operand" "%w"))
1061 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1063 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1064 [(set_attr "type" "neon_add_widen")]
1067 ;; Helpers for quad-word reduction operations
1069 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1070 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1071 ; N/2-element vector.
1073 (define_insn "quad_halves_<code>v4si"
1074 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1076 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1077 (parallel [(const_int 0) (const_int 1)]))
1078 (vec_select:V2SI (match_dup 1)
1079 (parallel [(const_int 2) (const_int 3)]))))]
1081 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1082 [(set_attr "vqh_mnem" "<VQH_mnem>")
1083 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1086 (define_insn "quad_halves_<code>v4sf"
1087 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1089 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1090 (parallel [(const_int 0) (const_int 1)]))
1091 (vec_select:V2SF (match_dup 1)
1092 (parallel [(const_int 2) (const_int 3)]))))]
1093 "ARM_HAVE_NEON_V4SF_ARITH"
1094 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1095 [(set_attr "vqh_mnem" "<VQH_mnem>")
1096 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1099 (define_insn "quad_halves_<code>v8hi"
1100 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1102 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1103 (parallel [(const_int 0) (const_int 1)
1104 (const_int 2) (const_int 3)]))
1105 (vec_select:V4HI (match_dup 1)
1106 (parallel [(const_int 4) (const_int 5)
1107 (const_int 6) (const_int 7)]))))]
1109 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1110 [(set_attr "vqh_mnem" "<VQH_mnem>")
1111 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1114 (define_insn "quad_halves_<code>v16qi"
1115 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1117 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1118 (parallel [(const_int 0) (const_int 1)
1119 (const_int 2) (const_int 3)
1120 (const_int 4) (const_int 5)
1121 (const_int 6) (const_int 7)]))
1122 (vec_select:V8QI (match_dup 1)
1123 (parallel [(const_int 8) (const_int 9)
1124 (const_int 10) (const_int 11)
1125 (const_int 12) (const_int 13)
1126 (const_int 14) (const_int 15)]))))]
1128 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1129 [(set_attr "vqh_mnem" "<VQH_mnem>")
1130 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1133 (define_expand "move_hi_quad_<mode>"
1134 [(match_operand:ANY128 0 "s_register_operand")
1135 (match_operand:<V_HALF> 1 "s_register_operand")]
1138 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1139 GET_MODE_SIZE (<V_HALF>mode)),
1144 (define_expand "move_lo_quad_<mode>"
1145 [(match_operand:ANY128 0 "s_register_operand")
1146 (match_operand:<V_HALF> 1 "s_register_operand")]
1149 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1155 ;; Reduction operations
1157 (define_expand "reduc_plus_scal_<mode>"
1158 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1159 (match_operand:VD 1 "s_register_operand")]
1160 "ARM_HAVE_NEON_<MODE>_ARITH"
1162 rtx vec = gen_reg_rtx (<MODE>mode);
1163 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1164 &gen_neon_vpadd_internal<mode>);
1165 /* The same result is actually computed into every element. */
1166 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1170 (define_expand "reduc_plus_scal_v2di"
1171 [(match_operand:DI 0 "nonimmediate_operand")
1172 (match_operand:V2DI 1 "s_register_operand")]
1173 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1175 rtx vec = gen_reg_rtx (V2DImode);
1177 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1178 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1183 (define_insn "arm_reduc_plus_internal_v2di"
1184 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1185 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1187 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1188 "vadd.i64\t%e0, %e1, %f1"
1189 [(set_attr "type" "neon_add_q")]
1192 (define_expand "reduc_smin_scal_<mode>"
1193 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1194 (match_operand:VD 1 "s_register_operand")]
1195 "ARM_HAVE_NEON_<MODE>_ARITH"
1197 rtx vec = gen_reg_rtx (<MODE>mode);
1199 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1200 &gen_neon_vpsmin<mode>);
1201 /* The result is computed into every element of the vector. */
1202 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1206 (define_expand "reduc_smin_scal_<mode>"
1207 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1208 (match_operand:VQ 1 "s_register_operand")]
1209 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1211 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1213 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1214 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1219 (define_expand "reduc_smax_scal_<mode>"
1220 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1221 (match_operand:VD 1 "s_register_operand")]
1222 "ARM_HAVE_NEON_<MODE>_ARITH"
1224 rtx vec = gen_reg_rtx (<MODE>mode);
1225 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1226 &gen_neon_vpsmax<mode>);
1227 /* The result is computed into every element of the vector. */
1228 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1232 (define_expand "reduc_smax_scal_<mode>"
1233 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1234 (match_operand:VQ 1 "s_register_operand")]
1235 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1237 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1239 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1240 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1245 (define_expand "reduc_umin_scal_<mode>"
1246 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1247 (match_operand:VDI 1 "s_register_operand")]
1250 rtx vec = gen_reg_rtx (<MODE>mode);
1251 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1252 &gen_neon_vpumin<mode>);
1253 /* The result is computed into every element of the vector. */
1254 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1258 (define_expand "reduc_umin_scal_<mode>"
1259 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1260 (match_operand:VQI 1 "s_register_operand")]
1261 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1263 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1265 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1266 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1271 (define_expand "reduc_umax_scal_<mode>"
1272 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1273 (match_operand:VDI 1 "s_register_operand")]
1276 rtx vec = gen_reg_rtx (<MODE>mode);
1277 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1278 &gen_neon_vpumax<mode>);
1279 /* The result is computed into every element of the vector. */
1280 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1284 (define_expand "reduc_umax_scal_<mode>"
1285 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1286 (match_operand:VQI 1 "s_register_operand")]
1287 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1289 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1291 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1292 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1297 (define_insn "neon_vpadd_internal<mode>"
1298 [(set (match_operand:VD 0 "s_register_operand" "=w")
1299 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1300 (match_operand:VD 2 "s_register_operand" "w")]
1303 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1304 ;; Assume this schedules like vadd.
1306 (if_then_else (match_test "<Is_float_mode>")
1307 (const_string "neon_fp_reduc_add_s<q>")
1308 (const_string "neon_reduc_add<q>")))]
1311 (define_insn "neon_vpaddv4hf"
1313 (match_operand:V4HF 0 "s_register_operand" "=w")
1314 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1315 (match_operand:V4HF 2 "s_register_operand" "w")]
1317 "TARGET_NEON_FP16INST"
1318 "vpadd.f16\t%P0, %P1, %P2"
1319 [(set_attr "type" "neon_reduc_add")]
1322 (define_insn "neon_vpsmin<mode>"
1323 [(set (match_operand:VD 0 "s_register_operand" "=w")
1324 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1325 (match_operand:VD 2 "s_register_operand" "w")]
1328 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1330 (if_then_else (match_test "<Is_float_mode>")
1331 (const_string "neon_fp_reduc_minmax_s<q>")
1332 (const_string "neon_reduc_minmax<q>")))]
1335 (define_insn "neon_vpsmax<mode>"
1336 [(set (match_operand:VD 0 "s_register_operand" "=w")
1337 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1338 (match_operand:VD 2 "s_register_operand" "w")]
1341 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1343 (if_then_else (match_test "<Is_float_mode>")
1344 (const_string "neon_fp_reduc_minmax_s<q>")
1345 (const_string "neon_reduc_minmax<q>")))]
1348 (define_insn "neon_vpumin<mode>"
1349 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1350 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1351 (match_operand:VDI 2 "s_register_operand" "w")]
1354 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1355 [(set_attr "type" "neon_reduc_minmax<q>")]
1358 (define_insn "neon_vpumax<mode>"
1359 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1360 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1361 (match_operand:VDI 2 "s_register_operand" "w")]
1364 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1365 [(set_attr "type" "neon_reduc_minmax<q>")]
1368 ;; Saturating arithmetic
1370 ; NOTE: Neon supports many more saturating variants of instructions than the
1371 ; following, but these are all GCC currently understands.
1372 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1373 ; yet either, although these patterns may be used by intrinsics when they're
1376 (define_insn "*ss_add<mode>_neon"
1377 [(set (match_operand:VD 0 "s_register_operand" "=w")
1378 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1379 (match_operand:VD 2 "s_register_operand" "w")))]
1381 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1382 [(set_attr "type" "neon_qadd<q>")]
1385 (define_insn "*us_add<mode>_neon"
1386 [(set (match_operand:VD 0 "s_register_operand" "=w")
1387 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1388 (match_operand:VD 2 "s_register_operand" "w")))]
1390 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1391 [(set_attr "type" "neon_qadd<q>")]
1394 (define_insn "*ss_sub<mode>_neon"
1395 [(set (match_operand:VD 0 "s_register_operand" "=w")
1396 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1397 (match_operand:VD 2 "s_register_operand" "w")))]
1399 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1400 [(set_attr "type" "neon_qsub<q>")]
1403 (define_insn "*us_sub<mode>_neon"
1404 [(set (match_operand:VD 0 "s_register_operand" "=w")
1405 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1406 (match_operand:VD 2 "s_register_operand" "w")))]
1408 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1409 [(set_attr "type" "neon_qsub<q>")]
1412 (define_expand "vec_cmp<mode><v_cmp_result>"
1413 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1414 (match_operator:<V_cmp_result> 1 "comparison_operator"
1415 [(match_operand:VDQWH 2 "s_register_operand")
1416 (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
1418 && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1420 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1421 operands[2], operands[3], false);
1425 (define_expand "vec_cmpu<mode><mode>"
1426 [(set (match_operand:VDQIW 0 "s_register_operand")
1427 (match_operator:VDQIW 1 "comparison_operator"
1428 [(match_operand:VDQIW 2 "s_register_operand")
1429 (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1432 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1433 operands[2], operands[3], false);
1437 (define_expand "vcond_mask_<mode><v_cmp_result>"
1438 [(set (match_operand:VDQWH 0 "s_register_operand")
1440 (match_operand:<V_cmp_result> 3 "s_register_operand")
1441 (match_operand:VDQWH 1 "s_register_operand")
1442 (match_operand:VDQWH 2 "s_register_operand")))]
1444 && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1446 emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1451 ;; Patterns for builtins.
1453 ; good for plain vadd, vaddq.
1455 (define_expand "neon_vadd<mode>"
1456 [(match_operand:VCVTF 0 "s_register_operand")
1457 (match_operand:VCVTF 1 "s_register_operand")
1458 (match_operand:VCVTF 2 "s_register_operand")]
1461 if (ARM_HAVE_NEON_<MODE>_ARITH)
1462 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1464 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1469 (define_expand "neon_vadd<mode>"
1470 [(match_operand:VH 0 "s_register_operand")
1471 (match_operand:VH 1 "s_register_operand")
1472 (match_operand:VH 2 "s_register_operand")]
1473 "TARGET_NEON_FP16INST"
1475 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1479 (define_expand "neon_vsub<mode>"
1480 [(match_operand:VH 0 "s_register_operand")
1481 (match_operand:VH 1 "s_register_operand")
1482 (match_operand:VH 2 "s_register_operand")]
1483 "TARGET_NEON_FP16INST"
1485 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
1489 ; Note that NEON operations don't support the full IEEE 754 standard: in
1490 ; particular, denormal values are flushed to zero. This means that GCC cannot
1491 ; use those instructions for autovectorization, etc. unless
1492 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1493 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1494 ; header) must work in either case: if -funsafe-math-optimizations is given,
1495 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1496 ; expand to unspecs (which may potentially limit the extent to which they might
1497 ; be optimized by generic code).
1499 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1501 (define_insn "neon_vadd<mode>_unspec"
1502 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1503 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1504 (match_operand:VCVTF 2 "s_register_operand" "w")]
1507 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1509 (if_then_else (match_test "<Is_float_mode>")
1510 (const_string "neon_fp_addsub_s<q>")
1511 (const_string "neon_add<q>")))]
1514 (define_insn "neon_vaddl<sup><mode>"
1515 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1516 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1517 (match_operand:VDI 2 "s_register_operand" "w")]
1520 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1521 [(set_attr "type" "neon_add_long")]
1524 (define_insn "neon_vaddw<sup><mode>"
1525 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1526 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1527 (match_operand:VDI 2 "s_register_operand" "w")]
1530 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1531 [(set_attr "type" "neon_add_widen")]
1536 (define_insn "@neon_v<r>hadd<sup><mode>"
1537 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1538 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1539 (match_operand:VDQIW 2 "s_register_operand" "w")]
1542 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1543 [(set_attr "type" "neon_add_halve_q")]
1546 (define_insn "neon_vqadd<sup><mode>"
1547 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1548 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1549 (match_operand:VDQIX 2 "s_register_operand" "w")]
1552 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1553 [(set_attr "type" "neon_qadd<q>")]
1556 (define_insn "neon_v<r>addhn<mode>"
1557 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1558 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1559 (match_operand:VN 2 "s_register_operand" "w")]
1562 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1563 [(set_attr "type" "neon_add_halve_narrow_q")]
1566 ;; Polynomial and Float multiplication.
1567 (define_insn "neon_vmul<pf><mode>"
1568 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1569 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1570 (match_operand:VPF 2 "s_register_operand" "w")]
1573 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1575 (if_then_else (match_test "<Is_float_mode>")
1576 (const_string "neon_fp_mul_s<q>")
1577 (const_string "neon_mul_<V_elem_ch><q>")))]
1580 (define_insn "neon_vmulf<mode>"
1582 (match_operand:VH 0 "s_register_operand" "=w")
1584 (match_operand:VH 1 "s_register_operand" "w")
1585 (match_operand:VH 2 "s_register_operand" "w")))]
1586 "TARGET_NEON_FP16INST"
1587 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1588 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1591 (define_expand "neon_vmla<mode>"
1592 [(match_operand:VDQW 0 "s_register_operand")
1593 (match_operand:VDQW 1 "s_register_operand")
1594 (match_operand:VDQW 2 "s_register_operand")
1595 (match_operand:VDQW 3 "s_register_operand")]
1598 if (ARM_HAVE_NEON_<MODE>_ARITH)
1599 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1600 operands[2], operands[3]));
1602 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1603 operands[2], operands[3]));
1607 (define_expand "neon_vfma<VCVTF:mode>"
1608 [(match_operand:VCVTF 0 "s_register_operand")
1609 (match_operand:VCVTF 1 "s_register_operand")
1610 (match_operand:VCVTF 2 "s_register_operand")
1611 (match_operand:VCVTF 3 "s_register_operand")]
1612 "TARGET_NEON && TARGET_FMA"
1614 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1619 (define_expand "neon_vfma<VH:mode>"
1620 [(match_operand:VH 0 "s_register_operand")
1621 (match_operand:VH 1 "s_register_operand")
1622 (match_operand:VH 2 "s_register_operand")
1623 (match_operand:VH 3 "s_register_operand")]
1624 "TARGET_NEON_FP16INST"
1626 emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1631 (define_expand "neon_vfms<VCVTF:mode>"
1632 [(match_operand:VCVTF 0 "s_register_operand")
1633 (match_operand:VCVTF 1 "s_register_operand")
1634 (match_operand:VCVTF 2 "s_register_operand")
1635 (match_operand:VCVTF 3 "s_register_operand")]
1636 "TARGET_NEON && TARGET_FMA"
1638 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1643 (define_expand "neon_vfms<VH:mode>"
1644 [(match_operand:VH 0 "s_register_operand")
1645 (match_operand:VH 1 "s_register_operand")
1646 (match_operand:VH 2 "s_register_operand")
1647 (match_operand:VH 3 "s_register_operand")]
1648 "TARGET_NEON_FP16INST"
1650 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1655 ;; The expand RTL structure here is not important.
1656 ;; We use the gen_* functions anyway.
1657 ;; We just need something to wrap the iterators around.
1659 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1660 [(set (match_operand:VCVTF 0 "s_register_operand")
1662 [(match_operand:VCVTF 1 "s_register_operand")
1664 (match_operand:<VFML> 2 "s_register_operand")
1665 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1668 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1669 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1677 (define_insn "vfmal_low<mode>_intrinsic"
1678 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1681 (vec_select:<VFMLSEL>
1682 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1683 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1685 (vec_select:<VFMLSEL>
1686 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1687 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1688 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1690 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1691 [(set_attr "type" "neon_fp_mla_s<q>")]
1694 (define_insn "vfmsl_high<mode>_intrinsic"
1695 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1699 (vec_select:<VFMLSEL>
1700 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1701 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1703 (vec_select:<VFMLSEL>
1704 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1705 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1706 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1708 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1709 [(set_attr "type" "neon_fp_mla_s<q>")]
1712 (define_insn "vfmal_high<mode>_intrinsic"
1713 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1716 (vec_select:<VFMLSEL>
1717 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1718 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1720 (vec_select:<VFMLSEL>
1721 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1722 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1723 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1725 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1726 [(set_attr "type" "neon_fp_mla_s<q>")]
1729 (define_insn "vfmsl_low<mode>_intrinsic"
1730 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1734 (vec_select:<VFMLSEL>
1735 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1736 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1738 (vec_select:<VFMLSEL>
1739 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1740 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1741 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1743 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1744 [(set_attr "type" "neon_fp_mla_s<q>")]
1747 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1748 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1750 [(match_operand:VCVTF 1 "s_register_operand")
1752 (match_operand:<VFML> 2 "s_register_operand")
1753 (match_operand:<VFML> 3 "s_register_operand"))
1754 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1757 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1758 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1759 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1760 (operands[0], operands[1],
1761 operands[2], operands[3],
1766 (define_insn "vfmal_lane_low<mode>_intrinsic"
1767 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1770 (vec_select:<VFMLSEL>
1771 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1772 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1774 (vec_duplicate:<VFMLSEL>
1776 (match_operand:<VFML> 3 "s_register_operand" "x")
1777 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1778 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1781 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1782 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1784 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1785 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1789 operands[5] = GEN_INT (lane);
1790 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1793 [(set_attr "type" "neon_fp_mla_s<q>")]
1796 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1797 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1799 [(match_operand:VCVTF 1 "s_register_operand")
1801 (match_operand:<VFML> 2 "s_register_operand")
1802 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1803 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1807 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1808 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1809 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1810 (operands[0], operands[1], operands[2], operands[3],
1815 ;; Used to implement the intrinsics:
1816 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1817 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1818 ;; Needs a bit of care to get the modes of the different sub-expressions right
1819 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1820 ;; S or D subregister to select the appropriate lane from.
1822 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1823 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1826 (vec_select:<VFMLSEL>
1827 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1828 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1830 (vec_duplicate:<VFMLSEL>
1832 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1833 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1834 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1837 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1838 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1839 int new_lane = lane % elts_per_reg;
1840 int regdiff = lane / elts_per_reg;
1841 operands[5] = GEN_INT (new_lane);
1842 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1843 because we want the print_operand code to print the appropriate
1844 S or D register prefix. */
1845 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1846 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1847 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1849 [(set_attr "type" "neon_fp_mla_s<q>")]
1852 ;; Used to implement the intrinsics:
1853 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1854 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1855 ;; Needs a bit of care to get the modes of the different sub-expressions right
1856 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1857 ;; S or D subregister to select the appropriate lane from.
1859 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1860 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1863 (vec_select:<VFMLSEL>
1864 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1865 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1867 (vec_duplicate:<VFMLSEL>
1869 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1870 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1871 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1874 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1875 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1876 int new_lane = lane % elts_per_reg;
1877 int regdiff = lane / elts_per_reg;
1878 operands[5] = GEN_INT (new_lane);
1879 /* We re-create operands[3] in the halved VFMLSEL mode
1880 because we've calculated the correct half-width subreg to extract
1881 the lane from and we want to print *that* subreg instead. */
1882 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1883 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
1885 [(set_attr "type" "neon_fp_mla_s<q>")]
1888 (define_insn "vfmal_lane_high<mode>_intrinsic"
1889 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1892 (vec_select:<VFMLSEL>
1893 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1894 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1896 (vec_duplicate:<VFMLSEL>
1898 (match_operand:<VFML> 3 "s_register_operand" "x")
1899 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1900 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1903 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1904 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1906 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1907 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
1911 operands[5] = GEN_INT (lane);
1912 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
1915 [(set_attr "type" "neon_fp_mla_s<q>")]
1918 (define_insn "vfmsl_lane_low<mode>_intrinsic"
1919 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1923 (vec_select:<VFMLSEL>
1924 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1925 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1927 (vec_duplicate:<VFMLSEL>
1929 (match_operand:<VFML> 3 "s_register_operand" "x")
1930 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1931 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1934 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1935 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1937 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1938 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1942 operands[5] = GEN_INT (lane);
1943 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1946 [(set_attr "type" "neon_fp_mla_s<q>")]
1949 ;; Used to implement the intrinsics:
1950 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1951 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1952 ;; Needs a bit of care to get the modes of the different sub-expressions right
1953 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1954 ;; S or D subregister to select the appropriate lane from.
1956 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
1957 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1961 (vec_select:<VFMLSEL>
1962 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1963 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1965 (vec_duplicate:<VFMLSEL>
1967 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1968 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1969 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1972 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1973 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1974 int new_lane = lane % elts_per_reg;
1975 int regdiff = lane / elts_per_reg;
1976 operands[5] = GEN_INT (new_lane);
1977 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1978 because we want the print_operand code to print the appropriate
1979 S or D register prefix. */
1980 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1981 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1982 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1984 [(set_attr "type" "neon_fp_mla_s<q>")]
1987 ;; Used to implement the intrinsics:
1988 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1989 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1990 ;; Needs a bit of care to get the modes of the different sub-expressions right
1991 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1992 ;; S or D subregister to select the appropriate lane from.
1994 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
1995 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1999 (vec_select:<VFMLSEL>
2000 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2001 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2003 (vec_duplicate:<VFMLSEL>
2005 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2006 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2007 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2010 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2011 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2012 int new_lane = lane % elts_per_reg;
2013 int regdiff = lane / elts_per_reg;
2014 operands[5] = GEN_INT (new_lane);
2015 /* We re-create operands[3] in the halved VFMLSEL mode
2016 because we've calculated the correct half-width subreg to extract
2017 the lane from and we want to print *that* subreg instead. */
2018 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2019 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2021 [(set_attr "type" "neon_fp_mla_s<q>")]
2024 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2025 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2029 (vec_select:<VFMLSEL>
2030 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2031 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2033 (vec_duplicate:<VFMLSEL>
2035 (match_operand:<VFML> 3 "s_register_operand" "x")
2036 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2037 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2040 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2041 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2043 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2044 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2048 operands[5] = GEN_INT (lane);
2049 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2052 [(set_attr "type" "neon_fp_mla_s<q>")]
2055 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2057 (define_insn "neon_vmla<mode>_unspec"
2058 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2059 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2060 (match_operand:VDQW 2 "s_register_operand" "w")
2061 (match_operand:VDQW 3 "s_register_operand" "w")]
2064 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2066 (if_then_else (match_test "<Is_float_mode>")
2067 (const_string "neon_fp_mla_s<q>")
2068 (const_string "neon_mla_<V_elem_ch><q>")))]
2071 (define_insn "neon_vmlal<sup><mode>"
2072 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2073 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2074 (match_operand:VW 2 "s_register_operand" "w")
2075 (match_operand:VW 3 "s_register_operand" "w")]
2078 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2079 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2082 (define_expand "neon_vmls<mode>"
2083 [(match_operand:VDQW 0 "s_register_operand")
2084 (match_operand:VDQW 1 "s_register_operand")
2085 (match_operand:VDQW 2 "s_register_operand")
2086 (match_operand:VDQW 3 "s_register_operand")]
2089 if (ARM_HAVE_NEON_<MODE>_ARITH)
2090 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2091 operands[1], operands[2], operands[3]));
2093 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2094 operands[2], operands[3]));
2098 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2100 (define_insn "neon_vmls<mode>_unspec"
2101 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2102 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2103 (match_operand:VDQW 2 "s_register_operand" "w")
2104 (match_operand:VDQW 3 "s_register_operand" "w")]
2107 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2109 (if_then_else (match_test "<Is_float_mode>")
2110 (const_string "neon_fp_mla_s<q>")
2111 (const_string "neon_mla_<V_elem_ch><q>")))]
2114 (define_insn "neon_vmlsl<sup><mode>"
2115 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2116 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2117 (match_operand:VW 2 "s_register_operand" "w")
2118 (match_operand:VW 3 "s_register_operand" "w")]
2121 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2122 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2125 ;; vqdmulh, vqrdmulh
2126 (define_insn "neon_vq<r>dmulh<mode>"
2127 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2128 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2129 (match_operand:VMDQI 2 "s_register_operand" "w")]
2132 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2133 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2136 ;; vqrdmlah, vqrdmlsh
2137 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2138 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2139 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2140 (match_operand:VMDQI 2 "s_register_operand" "w")
2141 (match_operand:VMDQI 3 "s_register_operand" "w")]
2144 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2145 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2148 (define_insn "neon_vqdmlal<mode>"
2149 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2150 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2151 (match_operand:VMDI 2 "s_register_operand" "w")
2152 (match_operand:VMDI 3 "s_register_operand" "w")]
2155 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2156 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2159 (define_insn "neon_vqdmlsl<mode>"
2160 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2161 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2162 (match_operand:VMDI 2 "s_register_operand" "w")
2163 (match_operand:VMDI 3 "s_register_operand" "w")]
2166 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2167 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2170 (define_insn "neon_vmull<sup><mode>"
2171 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2172 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2173 (match_operand:VW 2 "s_register_operand" "w")]
2176 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2177 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2180 (define_insn "neon_vqdmull<mode>"
2181 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2182 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2183 (match_operand:VMDI 2 "s_register_operand" "w")]
2186 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2187 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2190 (define_expand "neon_vsub<mode>"
2191 [(match_operand:VCVTF 0 "s_register_operand")
2192 (match_operand:VCVTF 1 "s_register_operand")
2193 (match_operand:VCVTF 2 "s_register_operand")]
2196 if (ARM_HAVE_NEON_<MODE>_ARITH)
2197 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2199 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2204 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2206 (define_insn "neon_vsub<mode>_unspec"
2207 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2208 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2209 (match_operand:VCVTF 2 "s_register_operand" "w")]
2212 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2214 (if_then_else (match_test "<Is_float_mode>")
2215 (const_string "neon_fp_addsub_s<q>")
2216 (const_string "neon_sub<q>")))]
2219 (define_insn "neon_vsubl<sup><mode>"
2220 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2221 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2222 (match_operand:VDI 2 "s_register_operand" "w")]
2225 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2226 [(set_attr "type" "neon_sub_long")]
2229 (define_insn "neon_vsubw<sup><mode>"
2230 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2231 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2232 (match_operand:VDI 2 "s_register_operand" "w")]
2235 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2236 [(set_attr "type" "neon_sub_widen")]
2239 (define_insn "neon_vqsub<sup><mode>"
2240 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2241 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2242 (match_operand:VDQIX 2 "s_register_operand" "w")]
2245 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2246 [(set_attr "type" "neon_qsub<q>")]
2249 (define_insn "neon_vhsub<sup><mode>"
2250 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2251 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2252 (match_operand:VDQIW 2 "s_register_operand" "w")]
2255 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2256 [(set_attr "type" "neon_sub_halve<q>")]
2259 (define_insn "neon_v<r>subhn<mode>"
2260 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2261 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2262 (match_operand:VN 2 "s_register_operand" "w")]
2265 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2266 [(set_attr "type" "neon_sub_halve_narrow_q")]
2269 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2270 ;; without unsafe math optimizations.
2271 (define_expand "@neon_vc<cmp_op><mode>"
2272 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2274 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2275 (match_operand:VDQW 2 "reg_or_zero_operand")))]
2278 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2280 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2281 && !flag_unsafe_math_optimizations)
2283 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2284 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2285 whereas this expander iterates over the integer modes as well,
2286 but we will never expand to UNSPECs for the integer comparisons. */
2290 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2295 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2304 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2311 (define_insn "@neon_vc<cmp_op><mode>_insn"
2312 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2314 (COMPARISONS:<V_cmp_result>
2315 (match_operand:VDQW 1 "s_register_operand" "w,w")
2316 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2317 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2318 && !flag_unsafe_math_optimizations)"
2321 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2323 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2324 ? "f" : "<cmp_type>",
2325 which_alternative == 0
2326 ? "%<V_reg>2" : "#0");
2327 output_asm_insn (pattern, operands);
2331 (if_then_else (match_operand 2 "zero_operand")
2332 (const_string "neon_compare_zero<q>")
2333 (const_string "neon_compare<q>")))]
2336 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2337 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2338 (unspec:<V_cmp_result>
2339 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2340 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2345 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2347 which_alternative == 0
2348 ? "%<V_reg>2" : "#0");
2349 output_asm_insn (pattern, operands);
2352 [(set_attr "type" "neon_fp_compare_s<q>")]
2355 (define_expand "@neon_vc<cmp_op><mode>"
2356 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2359 (match_operand:VH 1 "s_register_operand")
2360 (match_operand:VH 2 "reg_or_zero_operand")))]
2361 "TARGET_NEON_FP16INST"
2363 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2365 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2366 && !flag_unsafe_math_optimizations)
2368 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2369 (operands[0], operands[1], operands[2]));
2372 (gen_neon_vc<cmp_op><mode>_fp16insn
2373 (operands[0], operands[1], operands[2]));
2377 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2378 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2380 (COMPARISONS:<V_cmp_result>
2381 (match_operand:VH 1 "s_register_operand" "w,w")
2382 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2383 "TARGET_NEON_FP16INST
2384 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2385 && !flag_unsafe_math_optimizations)"
2388 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2390 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2391 ? "f" : "<cmp_type>",
2392 which_alternative == 0
2393 ? "%<V_reg>2" : "#0");
2394 output_asm_insn (pattern, operands);
2398 (if_then_else (match_operand 2 "zero_operand")
2399 (const_string "neon_compare_zero<q>")
2400 (const_string "neon_compare<q>")))])
2402 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2404 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2405 (unspec:<V_cmp_result>
2406 [(match_operand:VH 1 "s_register_operand" "w,w")
2407 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2409 "TARGET_NEON_FP16INST"
2412 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2414 which_alternative == 0
2415 ? "%<V_reg>2" : "#0");
2416 output_asm_insn (pattern, operands);
2419 [(set_attr "type" "neon_fp_compare_s<q>")])
2421 (define_insn "@neon_vc<code><mode>"
2422 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2424 (GTUGEU:<V_cmp_result>
2425 (match_operand:VDQIW 1 "s_register_operand" "w")
2426 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2428 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2429 [(set_attr "type" "neon_compare<q>")]
2432 (define_expand "neon_vca<cmp_op><mode>"
2433 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2435 (GLTE:<V_cmp_result>
2436 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2437 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2440 if (flag_unsafe_math_optimizations)
2441 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2444 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2451 (define_insn "neon_vca<cmp_op><mode>_insn"
2452 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2454 (GLTE:<V_cmp_result>
2455 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2456 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2457 "TARGET_NEON && flag_unsafe_math_optimizations"
2458 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2459 [(set_attr "type" "neon_fp_compare_s<q>")]
2462 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2463 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2464 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2465 (match_operand:VCVTF 2 "s_register_operand" "w")]
2468 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2469 [(set_attr "type" "neon_fp_compare_s<q>")]
2472 (define_expand "neon_vca<cmp_op><mode>"
2474 (match_operand:<V_cmp_result> 0 "s_register_operand")
2476 (GLTE:<V_cmp_result>
2477 (abs:VH (match_operand:VH 1 "s_register_operand"))
2478 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2479 "TARGET_NEON_FP16INST"
2481 if (flag_unsafe_math_optimizations)
2482 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2483 (operands[0], operands[1], operands[2]));
2485 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2486 (operands[0], operands[1], operands[2]));
2490 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2492 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2494 (GLTE:<V_cmp_result>
2495 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2496 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2497 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2498 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2499 [(set_attr "type" "neon_fp_compare_s<q>")]
2502 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2503 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2504 (unspec:<V_cmp_result>
2505 [(match_operand:VH 1 "s_register_operand" "w")
2506 (match_operand:VH 2 "s_register_operand" "w")]
2509 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2510 [(set_attr "type" "neon_fp_compare_s<q>")]
2513 (define_expand "neon_vc<cmp_op>z<mode>"
2515 (match_operand:<V_cmp_result> 0 "s_register_operand")
2516 (COMPARISONS:<V_cmp_result>
2517 (match_operand:VH 1 "s_register_operand")
2519 "TARGET_NEON_FP16INST"
2521 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2522 CONST0_RTX (<MODE>mode)));
2526 (define_insn "neon_vtst_combine<mode>"
2527 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2530 (and:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
2531 (match_operand:VDQIW 2 "s_register_operand" "w"))
2532 (match_operand:VDQIW 3 "zero_operand" "i"))
2533 (match_operand:VDQIW 4 "minus_one_operand" "i")))]
2535 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2536 [(set_attr "type" "neon_tst<q>")]
2539 (define_insn "neon_vabd<sup><mode>"
2540 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2541 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2542 (match_operand:VDQIW 2 "s_register_operand" "w")]
2545 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2546 [(set_attr "type" "neon_abd<q>")]
2549 (define_insn "neon_vabd<mode>"
2550 [(set (match_operand:VH 0 "s_register_operand" "=w")
2551 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2552 (match_operand:VH 2 "s_register_operand" "w")]
2554 "TARGET_NEON_FP16INST"
2555 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2556 [(set_attr "type" "neon_abd<q>")]
2559 (define_insn "neon_vabdf<mode>"
2560 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2561 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2562 (match_operand:VCVTF 2 "s_register_operand" "w")]
2565 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2566 [(set_attr "type" "neon_fp_abd_s<q>")]
2569 (define_insn "neon_vabdl<sup><mode>"
2570 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2571 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2572 (match_operand:VW 2 "s_register_operand" "w")]
2575 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2576 [(set_attr "type" "neon_abd_long")]
2579 (define_insn "neon_vaba<sup><mode>"
2580 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2581 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2582 (match_operand:VDQIW 3 "s_register_operand" "w")]
2584 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2586 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2587 [(set_attr "type" "neon_arith_acc<q>")]
2590 (define_insn "neon_vabal<sup><mode>"
2591 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2592 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2593 (match_operand:VW 3 "s_register_operand" "w")]
2595 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2597 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2598 [(set_attr "type" "neon_arith_acc<q>")]
2601 (define_expand "<sup>sadv16qi"
2602 [(use (match_operand:V4SI 0 "register_operand"))
2603 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2604 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2605 (use (match_operand:V4SI 3 "register_operand"))]
2608 rtx reduc = gen_reg_rtx (V8HImode);
2609 rtx op1_highpart = gen_reg_rtx (V8QImode);
2610 rtx op2_highpart = gen_reg_rtx (V8QImode);
2612 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2613 gen_lowpart (V8QImode, operands[1]),
2614 gen_lowpart (V8QImode, operands[2])));
2616 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2617 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2618 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2619 op1_highpart, op2_highpart));
2620 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2622 emit_move_insn (operands[0], operands[3]);
2627 (define_insn "neon_v<maxmin><sup><mode>"
2628 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2629 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2630 (match_operand:VDQIW 2 "s_register_operand" "w")]
2633 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2634 [(set_attr "type" "neon_minmax<q>")]
2637 (define_insn "neon_v<maxmin>f<mode>"
2638 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2639 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2640 (match_operand:VCVTF 2 "s_register_operand" "w")]
2643 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2644 [(set_attr "type" "neon_fp_minmax_s<q>")]
2647 (define_insn "neon_v<maxmin>f<mode>"
2648 [(set (match_operand:VH 0 "s_register_operand" "=w")
2650 [(match_operand:VH 1 "s_register_operand" "w")
2651 (match_operand:VH 2 "s_register_operand" "w")]
2653 "TARGET_NEON_FP16INST"
2654 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655 [(set_attr "type" "neon_fp_minmax_s<q>")]
2658 (define_insn "neon_vp<maxmin>fv4hf"
2659 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2661 [(match_operand:V4HF 1 "s_register_operand" "w")
2662 (match_operand:V4HF 2 "s_register_operand" "w")]
2664 "TARGET_NEON_FP16INST"
2665 "vp<maxmin>.f16\t%P0, %P1, %P2"
2666 [(set_attr "type" "neon_reduc_minmax")]
2669 (define_insn "neon_<fmaxmin_op><mode>"
2671 (match_operand:VH 0 "s_register_operand" "=w")
2673 [(match_operand:VH 1 "s_register_operand" "w")
2674 (match_operand:VH 2 "s_register_operand" "w")]
2676 "TARGET_NEON_FP16INST"
2677 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2678 [(set_attr "type" "neon_fp_minmax_s<q>")]
2681 ;; v<maxmin>nm intrinsics.
2682 (define_insn "neon_<fmaxmin_op><mode>"
2683 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2684 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2685 (match_operand:VCVTF 2 "s_register_operand" "w")]
2687 "TARGET_NEON && TARGET_VFP5"
2688 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2689 [(set_attr "type" "neon_fp_minmax_s<q>")]
2692 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2693 (define_insn "<fmaxmin><mode>3"
2694 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2695 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2696 (match_operand:VCVTF 2 "s_register_operand" "w")]
2698 "TARGET_NEON && TARGET_VFP5"
2699 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2700 [(set_attr "type" "neon_fp_minmax_s<q>")]
2703 (define_expand "neon_vpadd<mode>"
2704 [(match_operand:VD 0 "s_register_operand")
2705 (match_operand:VD 1 "s_register_operand")
2706 (match_operand:VD 2 "s_register_operand")]
2709 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2714 (define_insn "neon_vpaddl<sup><mode>"
2715 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2716 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2719 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2720 [(set_attr "type" "neon_reduc_add_long")]
2723 (define_insn "neon_vpadal<sup><mode>"
2724 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2725 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2726 (match_operand:VDQIW 2 "s_register_operand" "w")]
2729 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2730 [(set_attr "type" "neon_reduc_add_acc")]
2733 (define_insn "neon_vp<maxmin><sup><mode>"
2734 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2735 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2736 (match_operand:VDI 2 "s_register_operand" "w")]
2739 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2740 [(set_attr "type" "neon_reduc_minmax<q>")]
2743 (define_insn "neon_vp<maxmin>f<mode>"
2744 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2745 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2746 (match_operand:VCVTF 2 "s_register_operand" "w")]
2749 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2750 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2753 (define_insn "neon_vrecps<mode>"
2754 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2755 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2756 (match_operand:VCVTF 2 "s_register_operand" "w")]
2759 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2760 [(set_attr "type" "neon_fp_recps_s<q>")]
2763 (define_insn "neon_vrecps<mode>"
2765 (match_operand:VH 0 "s_register_operand" "=w")
2766 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2767 (match_operand:VH 2 "s_register_operand" "w")]
2769 "TARGET_NEON_FP16INST"
2770 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2771 [(set_attr "type" "neon_fp_recps_s<q>")]
2774 (define_insn "neon_vrsqrts<mode>"
2775 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2776 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2777 (match_operand:VCVTF 2 "s_register_operand" "w")]
2780 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2781 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2784 (define_insn "neon_vrsqrts<mode>"
2786 (match_operand:VH 0 "s_register_operand" "=w")
2787 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2788 (match_operand:VH 2 "s_register_operand" "w")]
2790 "TARGET_NEON_FP16INST"
2791 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2792 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2795 (define_expand "neon_vabs<mode>"
2796 [(match_operand:VDQW 0 "s_register_operand")
2797 (match_operand:VDQW 1 "s_register_operand")]
2800 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2804 (define_insn "neon_vqabs<mode>"
2805 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2806 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2809 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2810 [(set_attr "type" "neon_qabs<q>")]
2813 (define_insn "neon_bswap<mode>"
2814 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2815 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2817 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2818 [(set_attr "type" "neon_rev<q>")]
2821 (define_expand "neon_vneg<mode>"
2822 [(match_operand:VDQW 0 "s_register_operand")
2823 (match_operand:VDQW 1 "s_register_operand")]
2826 emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
2831 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2832 ;; fact that their usage need to guarantee that the source vectors are
2833 ;; contiguous. It would be wrong to describe the operation without being able
2834 ;; to describe the permute that is also required, but even if that is done
2835 ;; the permute would have been created as a LOAD_LANES which means the values
2836 ;; in the registers are in the wrong order.
2837 (define_insn "neon_vcadd<rot><mode>"
2838 [(set (match_operand:VF 0 "register_operand" "=w")
2839 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2840 (match_operand:VF 2 "register_operand" "w")]
2843 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2844 [(set_attr "type" "neon_fcadd")]
2847 (define_insn "neon_vcmla<rot><mode>"
2848 [(set (match_operand:VF 0 "register_operand" "=w")
2849 (plus:VF (match_operand:VF 1 "register_operand" "0")
2850 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2851 (match_operand:VF 3 "register_operand" "w")]
2854 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2855 [(set_attr "type" "neon_fcmla")]
2858 (define_insn "neon_vcmla_lane<rot><mode>"
2859 [(set (match_operand:VF 0 "s_register_operand" "=w")
2860 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2861 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2862 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2863 (match_operand:SI 4 "const_int_operand" "n")]
2867 operands = neon_vcmla_lane_prepare_operands (operands);
2868 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2870 [(set_attr "type" "neon_fcmla")]
2873 (define_insn "neon_vcmla_laneq<rot><mode>"
2874 [(set (match_operand:VDF 0 "s_register_operand" "=w")
2875 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
2876 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
2877 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
2878 (match_operand:SI 4 "const_int_operand" "n")]
2882 operands = neon_vcmla_lane_prepare_operands (operands);
2883 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2885 [(set_attr "type" "neon_fcmla")]
2888 (define_insn "neon_vcmlaq_lane<rot><mode>"
2889 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
2890 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
2891 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
2892 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
2893 (match_operand:SI 4 "const_int_operand" "n")]
2897 operands = neon_vcmla_lane_prepare_operands (operands);
2898 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2900 [(set_attr "type" "neon_fcmla")]
2903 ;; The complex mul operations always need to expand to two instructions.
2904 ;; The first operation does half the computation and the second does the
2905 ;; remainder. Because of this, expand early.
2906 (define_expand "cmul<conj_op><mode>3"
2907 [(set (match_operand:VDF 0 "register_operand")
2908 (unspec:VDF [(match_operand:VDF 1 "register_operand")
2909 (match_operand:VDF 2 "register_operand")]
2911 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
2913 rtx res1 = gen_reg_rtx (<MODE>mode);
2914 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
2915 emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
2916 operands[2], operands[1]));
2917 emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
2918 operands[2], operands[1]));
2923 ;; These map to the auto-vectorizer Dot Product optab.
2924 ;; The auto-vectorizer expects a dot product builtin that also does an
2925 ;; accumulation into the provided register.
2926 ;; Given the following pattern
2928 ;; for (i=0; i<len; i++) {
2934 ;; This can be auto-vectorized to
2935 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
2937 ;; given enough iterations. However the vectorizer can keep unrolling the loop
2938 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
2939 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
2942 ;; and so the vectorizer provides r, in which the result has to be accumulated.
2943 (define_insn "<sup>dot_prod<vsi2qi>"
2944 [(set (match_operand:VCVTI 0 "register_operand" "=w")
2946 (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
2947 (match_operand:<VSI2QI> 2 "register_operand" "w")]
2949 (match_operand:VCVTI 3 "register_operand" "0")))]
2951 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2952 [(set_attr "type" "neon_dot<q>")]
2955 ;; These instructions map to the __builtins for the Dot Product operations
2956 (define_expand "neon_<sup>dot<vsi2qi>"
2957 [(set (match_operand:VCVTI 0 "register_operand" "=w")
2959 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand")
2960 (match_operand:<VSI2QI> 3 "register_operand")]
2962 (match_operand:VCVTI 1 "register_operand")))]
2966 ;; These instructions map to the __builtins for the Dot Product operations.
2967 (define_insn "neon_usdot<vsi2qi>"
2968 [(set (match_operand:VCVTI 0 "register_operand" "=w")
2971 [(match_operand:<VSI2QI> 2 "register_operand" "w")
2972 (match_operand:<VSI2QI> 3 "register_operand" "w")]
2974 (match_operand:VCVTI 1 "register_operand" "0")))]
2976 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2977 [(set_attr "type" "neon_dot<q>")]
2980 ;; These instructions map to the __builtins for the Dot Product
2981 ;; indexed operations.
2982 (define_insn "neon_<sup>dot_lane<vsi2qi>"
2983 [(set (match_operand:VCVTI 0 "register_operand" "=w")
2985 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
2986 (match_operand:V8QI 3 "register_operand" "t")
2987 (match_operand:SI 4 "immediate_operand" "i")]
2989 (match_operand:VCVTI 1 "register_operand" "0")))]
2991 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
2992 [(set_attr "type" "neon_dot<q>")]
2995 ;; These instructions map to the __builtins for the Dot Product
2996 ;; indexed operations.
2997 (define_insn "neon_<sup>dot_laneq<vsi2qi>"
2998 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3000 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
3001 (match_operand:V16QI 3 "register_operand" "t")
3002 (match_operand:SI 4 "immediate_operand" "i")]
3004 (match_operand:VCVTI 1 "register_operand" "0")))]
3007 int lane = INTVAL (operands[4]);
3008 if (lane > GET_MODE_NUNITS (V2SImode) - 1)
3010 operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
3011 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
3015 operands[4] = GEN_INT (lane);
3016 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3019 [(set_attr "type" "neon_dot<q>")]
3022 ;; These instructions map to the __builtins for the Dot Product
3023 ;; indexed operations in the v8.6 I8MM extension.
3024 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3025 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3028 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3029 (match_operand:V8QI 3 "register_operand" "t")
3030 (match_operand:SI 4 "immediate_operand" "i")]
3032 (match_operand:VCVTI 1 "register_operand" "0")))]
3034 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
3035 [(set_attr "type" "neon_dot<q>")]
3038 ;; These instructions map to the __builtins for the Dot Product
3039 ;; indexed operations in the v8.6 I8MM extension.
3040 (define_insn "neon_<sup>dot_laneq<vsi2qi>"
3041 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3043 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
3044 (match_operand:V16QI 3 "register_operand" "t")
3045 (match_operand:SI 4 "immediate_operand" "i")]
3047 (match_operand:VCVTI 1 "register_operand" "0")))]
3050 int lane = INTVAL (operands[4]);
3051 if (lane > GET_MODE_NUNITS (V2SImode) - 1)
3053 operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
3054 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
3058 operands[4] = GEN_INT (lane);
3059 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3062 [(set_attr "type" "neon_dot<q>")]
3065 ;; Auto-vectorizer pattern for usdot
3066 (define_expand "usdot_prod<vsi2qi>"
3067 [(set (match_operand:VCVTI 0 "register_operand")
3068 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3070 (match_operand:<VSI2QI> 2
3071 "register_operand")]
3073 (match_operand:VCVTI 3 "register_operand")))]
3077 (define_expand "copysign<mode>3"
3078 [(match_operand:VCVTF 0 "register_operand")
3079 (match_operand:VCVTF 1 "register_operand")
3080 (match_operand:VCVTF 2 "register_operand")]
3084 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3085 rtx c = gen_int_mode (0x80000000, SImode);
3087 emit_move_insn (v_bitmask,
3088 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3089 emit_move_insn (operands[0], operands[2]);
3090 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3091 <VCVTF:V_cmp_result>mode, 0);
3092 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3099 (define_insn "neon_vqneg<mode>"
3100 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3101 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3104 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3105 [(set_attr "type" "neon_qneg<q>")]
3108 (define_insn "neon_vcls<mode>"
3109 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3110 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3113 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3114 [(set_attr "type" "neon_cls<q>")]
3117 (define_insn "neon_vclz<mode>"
3118 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3119 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3121 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3122 [(set_attr "type" "neon_cnt<q>")]
3125 (define_insn "popcount<mode>2"
3126 [(set (match_operand:VE 0 "s_register_operand" "=w")
3127 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3129 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3130 [(set_attr "type" "neon_cnt<q>")]
3133 (define_expand "neon_vcnt<mode>"
3134 [(match_operand:VE 0 "s_register_operand")
3135 (match_operand:VE 1 "s_register_operand")]
3138 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3142 (define_insn "neon_vrecpe<mode>"
3143 [(set (match_operand:VH 0 "s_register_operand" "=w")
3144 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3146 "TARGET_NEON_FP16INST"
3147 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3148 [(set_attr "type" "neon_fp_recpe_s<q>")]
3151 (define_insn "neon_vrecpe<mode>"
3152 [(set (match_operand:V32 0 "s_register_operand" "=w")
3153 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3156 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3157 [(set_attr "type" "neon_fp_recpe_s<q>")]
3160 (define_insn "neon_vrsqrte<mode>"
3161 [(set (match_operand:V32 0 "s_register_operand" "=w")
3162 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3165 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3166 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3169 (define_expand "neon_vmvn<mode>"
3170 [(match_operand:VDQIW 0 "s_register_operand")
3171 (match_operand:VDQIW 1 "s_register_operand")]
3174 emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
3178 (define_insn "neon_vget_lane<mode>_sext_internal"
3179 [(set (match_operand:SI 0 "s_register_operand" "=r")
3181 (vec_select:<V_elem>
3182 (match_operand:VD 1 "s_register_operand" "w")
3183 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3186 if (BYTES_BIG_ENDIAN)
3188 int elt = INTVAL (operands[2]);
3189 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3190 operands[2] = GEN_INT (elt);
3192 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3194 [(set_attr "type" "neon_to_gp")]
3197 (define_insn "neon_vget_lane<mode>_zext_internal"
3198 [(set (match_operand:SI 0 "s_register_operand" "=r")
3200 (vec_select:<V_elem>
3201 (match_operand:VD 1 "s_register_operand" "w")
3202 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3205 if (BYTES_BIG_ENDIAN)
3207 int elt = INTVAL (operands[2]);
3208 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3209 operands[2] = GEN_INT (elt);
3211 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3213 [(set_attr "type" "neon_to_gp")]
3216 (define_insn "neon_vget_lane<mode>_sext_internal"
3217 [(set (match_operand:SI 0 "s_register_operand" "=r")
3219 (vec_select:<V_elem>
3220 (match_operand:VQ2 1 "s_register_operand" "w")
3221 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3225 int regno = REGNO (operands[1]);
3226 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3227 unsigned int elt = INTVAL (operands[2]);
3228 unsigned int elt_adj = elt % halfelts;
3230 if (BYTES_BIG_ENDIAN)
3231 elt_adj = halfelts - 1 - elt_adj;
3233 ops[0] = operands[0];
3234 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3235 ops[2] = GEN_INT (elt_adj);
3236 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3240 [(set_attr "type" "neon_to_gp_q")]
3243 (define_insn "neon_vget_lane<mode>_zext_internal"
3244 [(set (match_operand:SI 0 "s_register_operand" "=r")
3246 (vec_select:<V_elem>
3247 (match_operand:VQ2 1 "s_register_operand" "w")
3248 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3252 int regno = REGNO (operands[1]);
3253 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3254 unsigned int elt = INTVAL (operands[2]);
3255 unsigned int elt_adj = elt % halfelts;
3257 if (BYTES_BIG_ENDIAN)
3258 elt_adj = halfelts - 1 - elt_adj;
3260 ops[0] = operands[0];
3261 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3262 ops[2] = GEN_INT (elt_adj);
3263 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3267 [(set_attr "type" "neon_to_gp_q")]
3270 (define_expand "neon_vget_lane<mode>"
3271 [(match_operand:<V_ext> 0 "s_register_operand")
3272 (match_operand:VDQW 1 "s_register_operand")
3273 (match_operand:SI 2 "immediate_operand")]
3276 if (BYTES_BIG_ENDIAN)
3278 /* The intrinsics are defined in terms of a model where the
3279 element ordering in memory is vldm order, whereas the generic
3280 RTL is defined in terms of a model where the element ordering
3281 in memory is array order. Convert the lane number to conform
3283 unsigned int elt = INTVAL (operands[2]);
3284 unsigned int reg_nelts
3285 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3286 elt ^= reg_nelts - 1;
3287 operands[2] = GEN_INT (elt);
3290 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3291 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3294 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3300 (define_expand "neon_vget_laneu<mode>"
3301 [(match_operand:<V_ext> 0 "s_register_operand")
3302 (match_operand:VDQIW 1 "s_register_operand")
3303 (match_operand:SI 2 "immediate_operand")]
3306 if (BYTES_BIG_ENDIAN)
3308 /* The intrinsics are defined in terms of a model where the
3309 element ordering in memory is vldm order, whereas the generic
3310 RTL is defined in terms of a model where the element ordering
3311 in memory is array order. Convert the lane number to conform
3313 unsigned int elt = INTVAL (operands[2]);
3314 unsigned int reg_nelts
3315 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3316 elt ^= reg_nelts - 1;
3317 operands[2] = GEN_INT (elt);
3320 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3321 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3324 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3330 (define_expand "neon_vget_lanedi"
3331 [(match_operand:DI 0 "s_register_operand")
3332 (match_operand:DI 1 "s_register_operand")
3333 (match_operand:SI 2 "immediate_operand")]
3336 emit_move_insn (operands[0], operands[1]);
3340 (define_expand "neon_vget_lanev2di"
3341 [(match_operand:DI 0 "s_register_operand")
3342 (match_operand:V2DI 1 "s_register_operand")
3343 (match_operand:SI 2 "immediate_operand")]
3348 if (BYTES_BIG_ENDIAN)
3350 /* The intrinsics are defined in terms of a model where the
3351 element ordering in memory is vldm order, whereas the generic
3352 RTL is defined in terms of a model where the element ordering
3353 in memory is array order. Convert the lane number to conform
3355 unsigned int elt = INTVAL (operands[2]);
3356 unsigned int reg_nelts = 2;
3357 elt ^= reg_nelts - 1;
3358 operands[2] = GEN_INT (elt);
3361 lane = INTVAL (operands[2]);
3362 gcc_assert ((lane ==0) || (lane == 1));
3363 emit_move_insn (operands[0], lane == 0
3364 ? gen_lowpart (DImode, operands[1])
3365 : gen_highpart (DImode, operands[1]));
3369 (define_expand "neon_vset_lane<mode>"
3370 [(match_operand:VDQ 0 "s_register_operand")
3371 (match_operand:<V_elem> 1 "s_register_operand")
3372 (match_operand:VDQ 2 "s_register_operand")
3373 (match_operand:SI 3 "immediate_operand")]
3376 unsigned int elt = INTVAL (operands[3]);
3378 if (BYTES_BIG_ENDIAN)
3380 unsigned int reg_nelts
3381 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3382 elt ^= reg_nelts - 1;
3385 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3386 GEN_INT (1 << elt), operands[2]));
3390 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3392 (define_expand "neon_vset_lanedi"
3393 [(match_operand:DI 0 "s_register_operand")
3394 (match_operand:DI 1 "s_register_operand")
3395 (match_operand:DI 2 "s_register_operand")
3396 (match_operand:SI 3 "immediate_operand")]
3399 emit_move_insn (operands[0], operands[1]);
3403 (define_expand "neon_vcreate<mode>"
3404 [(match_operand:VD_RE 0 "s_register_operand")
3405 (match_operand:DI 1 "general_operand")]
3408 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3409 emit_move_insn (operands[0], src);
3413 (define_insn "neon_vdup_n<mode>"
3414 [(set (match_operand:VX 0 "s_register_operand" "=w")
3415 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3417 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3418 [(set_attr "type" "neon_from_gp<q>")]
3421 (define_insn "neon_vdup_nv4hf"
3422 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3423 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3426 [(set_attr "type" "neon_from_gp")]
3429 (define_insn "neon_vdup_nv8hf"
3430 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3431 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3434 [(set_attr "type" "neon_from_gp_q")]
3437 (define_insn "neon_vdup_nv4bf"
3438 [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3439 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3442 [(set_attr "type" "neon_from_gp")]
3445 (define_insn "neon_vdup_nv8bf"
3446 [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3447 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3450 [(set_attr "type" "neon_from_gp_q")]
3453 (define_insn "neon_vdup_n<mode>"
3454 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3455 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3458 vdup.<V_sz_elem>\t%<V_reg>0, %1
3459 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3460 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3463 (define_expand "neon_vdup_ndi"
3464 [(match_operand:DI 0 "s_register_operand")
3465 (match_operand:DI 1 "s_register_operand")]
3468 emit_move_insn (operands[0], operands[1]);
3473 (define_insn "neon_vdup_nv2di"
3474 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3475 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3478 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3479 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3480 [(set_attr "length" "8")
3481 (set_attr "type" "multiple")]
3484 (define_insn "neon_vdup_lane<mode>_internal"
3485 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3487 (vec_select:<V_elem>
3488 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3489 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3492 if (BYTES_BIG_ENDIAN)
3494 int elt = INTVAL (operands[2]);
3495 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3496 operands[2] = GEN_INT (elt);
3499 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3501 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3503 [(set_attr "type" "neon_dup<q>")]
3506 (define_insn "neon_vdup_lane<mode>_internal"
3507 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3508 (vec_duplicate:VHFBF
3509 (vec_select:<V_elem>
3510 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3511 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3512 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3514 if (BYTES_BIG_ENDIAN)
3516 int elt = INTVAL (operands[2]);
3517 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3518 operands[2] = GEN_INT (elt);
3521 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3523 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3525 [(set_attr "type" "neon_dup<q>")]
3528 (define_expand "neon_vdup_lane<mode>"
3529 [(match_operand:VDQW 0 "s_register_operand")
3530 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3531 (match_operand:SI 2 "immediate_operand")]
3534 if (BYTES_BIG_ENDIAN)
3536 unsigned int elt = INTVAL (operands[2]);
3537 unsigned int reg_nelts
3538 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3539 elt ^= reg_nelts - 1;
3540 operands[2] = GEN_INT (elt);
3542 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3547 (define_expand "neon_vdup_lane<mode>"
3548 [(match_operand:VHFBF 0 "s_register_operand")
3549 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3550 (match_operand:SI 2 "immediate_operand")]
3551 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3553 if (BYTES_BIG_ENDIAN)
3555 unsigned int elt = INTVAL (operands[2]);
3556 unsigned int reg_nelts
3557 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3558 elt ^= reg_nelts - 1;
3559 operands[2] = GEN_INT (elt);
3561 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3566 ; Scalar index is ignored, since only zero is valid here.
3567 (define_expand "neon_vdup_lanedi"
3568 [(match_operand:DI 0 "s_register_operand")
3569 (match_operand:DI 1 "s_register_operand")
3570 (match_operand:SI 2 "immediate_operand")]
3573 emit_move_insn (operands[0], operands[1]);
3577 ; Likewise for v2di, as the DImode second operand has only a single element.
3578 (define_expand "neon_vdup_lanev2di"
3579 [(match_operand:V2DI 0 "s_register_operand")
3580 (match_operand:DI 1 "s_register_operand")
3581 (match_operand:SI 2 "immediate_operand")]
3584 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3588 ; Disabled before reload because we don't want combine doing something silly,
3589 ; but used by the post-reload expansion of neon_vcombine.
3590 (define_insn "*neon_vswp<mode>"
3591 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3592 (match_operand:VDQX 1 "s_register_operand" "+w"))
3593 (set (match_dup 1) (match_dup 0))]
3594 "TARGET_NEON && reload_completed"
3595 "vswp\t%<V_reg>0, %<V_reg>1"
3596 [(set_attr "type" "neon_permute<q>")]
3599 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3601 ;; FIXME: A different implementation of this builtin could make it much
3602 ;; more likely that we wouldn't actually need to output anything (we could make
3603 ;; it so that the reg allocator puts things in the right places magically
3604 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3606 (define_insn_and_split "neon_vcombine<mode>"
3607 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3608 (vec_concat:<V_DOUBLE>
3609 (match_operand:VDX 1 "s_register_operand" "w")
3610 (match_operand:VDX 2 "s_register_operand" "w")))]
3613 "&& reload_completed"
3616 neon_split_vcombine (operands);
3619 [(set_attr "type" "multiple")]
3622 (define_expand "neon_vget_high<mode>"
3623 [(match_operand:<V_HALF> 0 "s_register_operand")
3624 (match_operand:VQXBF 1 "s_register_operand")]
3627 emit_move_insn (operands[0],
3628 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3629 GET_MODE_SIZE (<V_HALF>mode)));
3633 (define_expand "neon_vget_low<mode>"
3634 [(match_operand:<V_HALF> 0 "s_register_operand")
3635 (match_operand:VQX 1 "s_register_operand")]
3638 emit_move_insn (operands[0],
3639 simplify_gen_subreg (<V_HALF>mode, operands[1],
3644 (define_insn "float<mode><V_cvtto>2"
3645 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3646 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3647 "TARGET_NEON && !flag_rounding_math"
3648 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3649 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3652 (define_insn "floatuns<mode><V_cvtto>2"
3653 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3654 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3655 "TARGET_NEON && !flag_rounding_math"
3656 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3657 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3660 (define_insn "fix_trunc<mode><V_cvtto>2"
3661 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3662 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3664 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3665 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3668 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3669 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3670 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3672 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3673 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3676 (define_insn "neon_vcvt<sup><mode>"
3677 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3678 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3681 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3682 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3685 (define_insn "neon_vcvt<sup><mode>"
3686 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3687 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3690 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3691 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3694 (define_insn "neon_vcvtv4sfv4hf"
3695 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3696 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3698 "TARGET_NEON && TARGET_FP16"
3699 "vcvt.f32.f16\t%q0, %P1"
3700 [(set_attr "type" "neon_fp_cvt_widen_h")]
3703 (define_insn "neon_vcvtv4hfv4sf"
3704 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3705 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3707 "TARGET_NEON && TARGET_FP16"
3708 "vcvt.f16.f32\t%P0, %q1"
3709 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3712 (define_insn "neon_vcvt<sup><mode>"
3714 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3716 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3718 "TARGET_NEON_FP16INST"
3719 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3720 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3723 (define_insn "neon_vcvt<sup><mode>"
3725 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3727 [(match_operand:VH 1 "s_register_operand" "w")]
3729 "TARGET_NEON_FP16INST"
3730 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3731 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3734 (define_insn "neon_vcvt<sup>_n<mode>"
3735 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3736 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3737 (match_operand:SI 2 "immediate_operand" "i")]
3741 arm_const_bounds (operands[2], 1, 33);
3742 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3744 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3747 (define_insn "neon_vcvt<sup>_n<mode>"
3748 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3750 [(match_operand:VH 1 "s_register_operand" "w")
3751 (match_operand:SI 2 "immediate_operand" "i")]
3753 "TARGET_NEON_FP16INST"
3755 arm_const_bounds (operands[2], 0, 17);
3756 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3758 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3761 (define_insn "neon_vcvt<sup>_n<mode>"
3762 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3763 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3764 (match_operand:SI 2 "immediate_operand" "i")]
3768 arm_const_bounds (operands[2], 1, 33);
3769 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3771 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3774 (define_insn "neon_vcvt<sup>_n<mode>"
3775 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3777 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3778 (match_operand:SI 2 "immediate_operand" "i")]
3780 "TARGET_NEON_FP16INST"
3782 arm_const_bounds (operands[2], 0, 17);
3783 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3785 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3788 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3790 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3792 [(match_operand:VH 1 "s_register_operand" "w")]
3794 "TARGET_NEON_FP16INST"
3795 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3796 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3799 (define_insn "neon_vmovn<mode>"
3800 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3801 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3804 "vmovn.<V_if_elem>\t%P0, %q1"
3805 [(set_attr "type" "neon_shift_imm_narrow_q")]
3808 (define_insn "neon_vqmovn<sup><mode>"
3809 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3810 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3813 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3814 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3817 (define_insn "neon_vqmovun<mode>"
3818 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3819 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3822 "vqmovun.<V_s_elem>\t%P0, %q1"
3823 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3826 (define_insn "neon_vmovl<sup><mode>"
3827 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3828 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3831 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3832 [(set_attr "type" "neon_shift_imm_long")]
3835 (define_insn "neon_vmul_lane<mode>"
3836 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3837 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3838 (match_operand:VMD 2 "s_register_operand"
3839 "<scalar_mul_constraint>")
3840 (match_operand:SI 3 "immediate_operand" "i")]
3844 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3847 (if_then_else (match_test "<Is_float_mode>")
3848 (const_string "neon_fp_mul_s_scalar<q>")
3849 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3852 (define_insn "neon_vmul_lane<mode>"
3853 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3854 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3855 (match_operand:<V_HALF> 2 "s_register_operand"
3856 "<scalar_mul_constraint>")
3857 (match_operand:SI 3 "immediate_operand" "i")]
3861 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3864 (if_then_else (match_test "<Is_float_mode>")
3865 (const_string "neon_fp_mul_s_scalar<q>")
3866 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3869 (define_insn "neon_vmul_lane<mode>"
3870 [(set (match_operand:VH 0 "s_register_operand" "=w")
3871 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3872 (match_operand:V4HF 2 "s_register_operand"
3873 "<scalar_mul_constraint>")
3874 (match_operand:SI 3 "immediate_operand" "i")]
3876 "TARGET_NEON_FP16INST"
3877 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3878 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3881 (define_insn "neon_vmull<sup>_lane<mode>"
3882 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3883 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3884 (match_operand:VMDI 2 "s_register_operand"
3885 "<scalar_mul_constraint>")
3886 (match_operand:SI 3 "immediate_operand" "i")]
3890 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3892 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3895 (define_insn "neon_vqdmull_lane<mode>"
3896 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3897 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3898 (match_operand:VMDI 2 "s_register_operand"
3899 "<scalar_mul_constraint>")
3900 (match_operand:SI 3 "immediate_operand" "i")]
3901 UNSPEC_VQDMULL_LANE))]
3904 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3906 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3909 (define_insn "neon_vq<r>dmulh_lane<mode>"
3910 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3911 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3912 (match_operand:<V_HALF> 2 "s_register_operand"
3913 "<scalar_mul_constraint>")
3914 (match_operand:SI 3 "immediate_operand" "i")]
3918 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3920 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3923 (define_insn "neon_vq<r>dmulh_lane<mode>"
3924 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3925 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3926 (match_operand:VMDI 2 "s_register_operand"
3927 "<scalar_mul_constraint>")
3928 (match_operand:SI 3 "immediate_operand" "i")]
3932 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3934 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3937 ;; vqrdmlah_lane, vqrdmlsh_lane
3938 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3939 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3940 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3941 (match_operand:VMQI 2 "s_register_operand" "w")
3942 (match_operand:<V_HALF> 3 "s_register_operand"
3943 "<scalar_mul_constraint>")
3944 (match_operand:SI 4 "immediate_operand" "i")]
3949 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3951 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3954 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3955 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3956 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3957 (match_operand:VMDI 2 "s_register_operand" "w")
3958 (match_operand:VMDI 3 "s_register_operand"
3959 "<scalar_mul_constraint>")
3960 (match_operand:SI 4 "immediate_operand" "i")]
3965 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3967 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3970 (define_insn "neon_vmla_lane<mode>"
3971 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3972 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3973 (match_operand:VMD 2 "s_register_operand" "w")
3974 (match_operand:VMD 3 "s_register_operand"
3975 "<scalar_mul_constraint>")
3976 (match_operand:SI 4 "immediate_operand" "i")]
3980 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3983 (if_then_else (match_test "<Is_float_mode>")
3984 (const_string "neon_fp_mla_s_scalar<q>")
3985 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3988 (define_insn "neon_vmla_lane<mode>"
3989 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3990 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3991 (match_operand:VMQ 2 "s_register_operand" "w")
3992 (match_operand:<V_HALF> 3 "s_register_operand"
3993 "<scalar_mul_constraint>")
3994 (match_operand:SI 4 "immediate_operand" "i")]
3998 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4001 (if_then_else (match_test "<Is_float_mode>")
4002 (const_string "neon_fp_mla_s_scalar<q>")
4003 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4006 (define_insn "neon_vmlal<sup>_lane<mode>"
4007 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4008 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4009 (match_operand:VMDI 2 "s_register_operand" "w")
4010 (match_operand:VMDI 3 "s_register_operand"
4011 "<scalar_mul_constraint>")
4012 (match_operand:SI 4 "immediate_operand" "i")]
4016 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4018 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4021 (define_insn "neon_vqdmlal_lane<mode>"
4022 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4023 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4024 (match_operand:VMDI 2 "s_register_operand" "w")
4025 (match_operand:VMDI 3 "s_register_operand"
4026 "<scalar_mul_constraint>")
4027 (match_operand:SI 4 "immediate_operand" "i")]
4028 UNSPEC_VQDMLAL_LANE))]
4031 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4033 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4036 (define_insn "neon_vmls_lane<mode>"
4037 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4038 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4039 (match_operand:VMD 2 "s_register_operand" "w")
4040 (match_operand:VMD 3 "s_register_operand"
4041 "<scalar_mul_constraint>")
4042 (match_operand:SI 4 "immediate_operand" "i")]
4046 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4049 (if_then_else (match_test "<Is_float_mode>")
4050 (const_string "neon_fp_mla_s_scalar<q>")
4051 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4054 (define_insn "neon_vmls_lane<mode>"
4055 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4056 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4057 (match_operand:VMQ 2 "s_register_operand" "w")
4058 (match_operand:<V_HALF> 3 "s_register_operand"
4059 "<scalar_mul_constraint>")
4060 (match_operand:SI 4 "immediate_operand" "i")]
4064 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4067 (if_then_else (match_test "<Is_float_mode>")
4068 (const_string "neon_fp_mla_s_scalar<q>")
4069 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4072 (define_insn "neon_vmlsl<sup>_lane<mode>"
4073 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4074 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4075 (match_operand:VMDI 2 "s_register_operand" "w")
4076 (match_operand:VMDI 3 "s_register_operand"
4077 "<scalar_mul_constraint>")
4078 (match_operand:SI 4 "immediate_operand" "i")]
4082 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4084 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4087 (define_insn "neon_vqdmlsl_lane<mode>"
4088 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4089 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4090 (match_operand:VMDI 2 "s_register_operand" "w")
4091 (match_operand:VMDI 3 "s_register_operand"
4092 "<scalar_mul_constraint>")
4093 (match_operand:SI 4 "immediate_operand" "i")]
4094 UNSPEC_VQDMLSL_LANE))]
4097 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4099 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4102 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4103 ; core register into a temp register, then use a scalar taken from that. This
4104 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4105 ; or extracted from another vector. The latter case it's currently better to
4106 ; use the "_lane" variant, and the former case can probably be implemented
4107 ; using vld1_lane, but that hasn't been done yet.
4109 (define_expand "neon_vmul_n<mode>"
4110 [(match_operand:VMD 0 "s_register_operand")
4111 (match_operand:VMD 1 "s_register_operand")
4112 (match_operand:<V_elem> 2 "s_register_operand")]
4115 rtx tmp = gen_reg_rtx (<MODE>mode);
4116 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4117 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4122 (define_expand "neon_vmul_n<mode>"
4123 [(match_operand:VMQ 0 "s_register_operand")
4124 (match_operand:VMQ 1 "s_register_operand")
4125 (match_operand:<V_elem> 2 "s_register_operand")]
4128 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4129 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4130 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4135 (define_expand "neon_vmul_n<mode>"
4136 [(match_operand:VH 0 "s_register_operand")
4137 (match_operand:VH 1 "s_register_operand")
4138 (match_operand:<V_elem> 2 "s_register_operand")]
4139 "TARGET_NEON_FP16INST"
4141 rtx tmp = gen_reg_rtx (V4HFmode);
4142 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4143 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4148 (define_expand "neon_vmulls_n<mode>"
4149 [(match_operand:<V_widen> 0 "s_register_operand")
4150 (match_operand:VMDI 1 "s_register_operand")
4151 (match_operand:<V_elem> 2 "s_register_operand")]
4154 rtx tmp = gen_reg_rtx (<MODE>mode);
4155 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4156 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4161 (define_expand "neon_vmullu_n<mode>"
4162 [(match_operand:<V_widen> 0 "s_register_operand")
4163 (match_operand:VMDI 1 "s_register_operand")
4164 (match_operand:<V_elem> 2 "s_register_operand")]
4167 rtx tmp = gen_reg_rtx (<MODE>mode);
4168 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4169 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4174 (define_expand "neon_vqdmull_n<mode>"
4175 [(match_operand:<V_widen> 0 "s_register_operand")
4176 (match_operand:VMDI 1 "s_register_operand")
4177 (match_operand:<V_elem> 2 "s_register_operand")]
4180 rtx tmp = gen_reg_rtx (<MODE>mode);
4181 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4182 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4187 (define_expand "neon_vqdmulh_n<mode>"
4188 [(match_operand:VMDI 0 "s_register_operand")
4189 (match_operand:VMDI 1 "s_register_operand")
4190 (match_operand:<V_elem> 2 "s_register_operand")]
4193 rtx tmp = gen_reg_rtx (<MODE>mode);
4194 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4195 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4200 (define_expand "neon_vqrdmulh_n<mode>"
4201 [(match_operand:VMDI 0 "s_register_operand")
4202 (match_operand:VMDI 1 "s_register_operand")
4203 (match_operand:<V_elem> 2 "s_register_operand")]
4206 rtx tmp = gen_reg_rtx (<MODE>mode);
4207 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4208 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4213 (define_expand "neon_vqdmulh_n<mode>"
4214 [(match_operand:VMQI 0 "s_register_operand")
4215 (match_operand:VMQI 1 "s_register_operand")
4216 (match_operand:<V_elem> 2 "s_register_operand")]
4219 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4220 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4221 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4226 (define_expand "neon_vqrdmulh_n<mode>"
4227 [(match_operand:VMQI 0 "s_register_operand")
4228 (match_operand:VMQI 1 "s_register_operand")
4229 (match_operand:<V_elem> 2 "s_register_operand")]
4232 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4233 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4234 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4239 (define_expand "neon_vmla_n<mode>"
4240 [(match_operand:VMD 0 "s_register_operand")
4241 (match_operand:VMD 1 "s_register_operand")
4242 (match_operand:VMD 2 "s_register_operand")
4243 (match_operand:<V_elem> 3 "s_register_operand")]
4246 rtx tmp = gen_reg_rtx (<MODE>mode);
4247 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4248 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4253 (define_expand "neon_vmla_n<mode>"
4254 [(match_operand:VMQ 0 "s_register_operand")
4255 (match_operand:VMQ 1 "s_register_operand")
4256 (match_operand:VMQ 2 "s_register_operand")
4257 (match_operand:<V_elem> 3 "s_register_operand")]
4260 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4261 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4262 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4267 (define_expand "neon_vmlals_n<mode>"
4268 [(match_operand:<V_widen> 0 "s_register_operand")
4269 (match_operand:<V_widen> 1 "s_register_operand")
4270 (match_operand:VMDI 2 "s_register_operand")
4271 (match_operand:<V_elem> 3 "s_register_operand")]
4274 rtx tmp = gen_reg_rtx (<MODE>mode);
4275 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4276 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4281 (define_expand "neon_vmlalu_n<mode>"
4282 [(match_operand:<V_widen> 0 "s_register_operand")
4283 (match_operand:<V_widen> 1 "s_register_operand")
4284 (match_operand:VMDI 2 "s_register_operand")
4285 (match_operand:<V_elem> 3 "s_register_operand")]
4288 rtx tmp = gen_reg_rtx (<MODE>mode);
4289 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4290 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4295 (define_expand "neon_vqdmlal_n<mode>"
4296 [(match_operand:<V_widen> 0 "s_register_operand")
4297 (match_operand:<V_widen> 1 "s_register_operand")
4298 (match_operand:VMDI 2 "s_register_operand")
4299 (match_operand:<V_elem> 3 "s_register_operand")]
4302 rtx tmp = gen_reg_rtx (<MODE>mode);
4303 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4304 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4309 (define_expand "neon_vmls_n<mode>"
4310 [(match_operand:VMD 0 "s_register_operand")
4311 (match_operand:VMD 1 "s_register_operand")
4312 (match_operand:VMD 2 "s_register_operand")
4313 (match_operand:<V_elem> 3 "s_register_operand")]
4316 rtx tmp = gen_reg_rtx (<MODE>mode);
4317 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4318 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4323 (define_expand "neon_vmls_n<mode>"
4324 [(match_operand:VMQ 0 "s_register_operand")
4325 (match_operand:VMQ 1 "s_register_operand")
4326 (match_operand:VMQ 2 "s_register_operand")
4327 (match_operand:<V_elem> 3 "s_register_operand")]
4330 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4331 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4332 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4337 (define_expand "neon_vmlsls_n<mode>"
4338 [(match_operand:<V_widen> 0 "s_register_operand")
4339 (match_operand:<V_widen> 1 "s_register_operand")
4340 (match_operand:VMDI 2 "s_register_operand")
4341 (match_operand:<V_elem> 3 "s_register_operand")]
4344 rtx tmp = gen_reg_rtx (<MODE>mode);
4345 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4346 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4351 (define_expand "neon_vmlslu_n<mode>"
4352 [(match_operand:<V_widen> 0 "s_register_operand")
4353 (match_operand:<V_widen> 1 "s_register_operand")
4354 (match_operand:VMDI 2 "s_register_operand")
4355 (match_operand:<V_elem> 3 "s_register_operand")]
4358 rtx tmp = gen_reg_rtx (<MODE>mode);
4359 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4360 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4365 (define_expand "neon_vqdmlsl_n<mode>"
4366 [(match_operand:<V_widen> 0 "s_register_operand")
4367 (match_operand:<V_widen> 1 "s_register_operand")
4368 (match_operand:VMDI 2 "s_register_operand")
4369 (match_operand:<V_elem> 3 "s_register_operand")]
4372 rtx tmp = gen_reg_rtx (<MODE>mode);
4373 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4374 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4379 (define_insn "@neon_vext<mode>"
4380 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4381 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4382 (match_operand:VDQX 2 "s_register_operand" "w")
4383 (match_operand:SI 3 "immediate_operand" "i")]
4387 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4388 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4390 [(set_attr "type" "neon_ext<q>")]
4393 (define_insn "@neon_vrev64<mode>"
4394 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4395 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4398 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4399 [(set_attr "type" "neon_rev<q>")]
4402 (define_insn "@neon_vrev32<mode>"
4403 [(set (match_operand:VX 0 "s_register_operand" "=w")
4404 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4407 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4408 [(set_attr "type" "neon_rev<q>")]
4411 (define_insn "@neon_vrev16<mode>"
4412 [(set (match_operand:VE 0 "s_register_operand" "=w")
4413 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4416 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4417 [(set_attr "type" "neon_rev<q>")]
4420 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4421 ; allocation. For an intrinsic of form:
4422 ; rD = vbsl_* (rS, rN, rM)
4423 ; We can use any of:
4424 ; vbsl rS, rN, rM (if D = S)
4425 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4426 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4428 (define_insn "neon_vbsl<mode>_internal"
4429 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4430 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4431 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4432 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4436 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4437 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4438 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4439 [(set_attr "type" "neon_bsl<q>")]
4442 (define_expand "@neon_vbsl<mode>"
4443 [(set (match_operand:VDQX 0 "s_register_operand")
4444 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4445 (match_operand:VDQX 2 "s_register_operand")
4446 (match_operand:VDQX 3 "s_register_operand")]
4450 /* We can't alias operands together if they have different modes. */
4451 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4455 (define_insn "neon_v<shift_op><sup><mode>"
4456 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4457 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4458 (match_operand:VDQIX 2 "s_register_operand" "w")]
4461 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4462 [(set_attr "type" "neon_shift_imm<q>")]
4466 (define_insn "neon_v<shift_op><sup><mode>"
4467 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4468 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4469 (match_operand:VDQIX 2 "s_register_operand" "w")]
4472 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4473 [(set_attr "type" "neon_sat_shift_imm<q>")]
4477 (define_insn "neon_v<shift_op><sup>_n<mode>"
4478 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4479 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4480 (match_operand:SI 2 "immediate_operand" "i")]
4484 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4485 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4487 [(set_attr "type" "neon_shift_imm<q>")]
4490 ;; vshrn_n, vrshrn_n
4491 (define_insn "neon_v<shift_op>_n<mode>"
4492 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4493 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4494 (match_operand:SI 2 "immediate_operand" "i")]
4498 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4499 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4501 [(set_attr "type" "neon_shift_imm_narrow_q")]
4504 ;; vqshrn_n, vqrshrn_n
4505 (define_insn "neon_v<shift_op><sup>_n<mode>"
4506 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4507 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4508 (match_operand:SI 2 "immediate_operand" "i")]
4512 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4513 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4515 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4518 ;; vqshrun_n, vqrshrun_n
4519 (define_insn "neon_v<shift_op>_n<mode>"
4520 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4521 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4522 (match_operand:SI 2 "immediate_operand" "i")]
4526 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4527 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4529 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4532 (define_insn "neon_vshl_n<mode>"
4533 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4534 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4535 (match_operand:SI 2 "immediate_operand" "i")]
4539 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4540 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4542 [(set_attr "type" "neon_shift_imm<q>")]
4545 (define_insn "neon_vqshl_<sup>_n<mode>"
4546 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4547 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4548 (match_operand:SI 2 "immediate_operand" "i")]
4552 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4553 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4555 [(set_attr "type" "neon_sat_shift_imm<q>")]
4558 (define_insn "neon_vqshlu_n<mode>"
4559 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4560 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4561 (match_operand:SI 2 "immediate_operand" "i")]
4565 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4566 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4568 [(set_attr "type" "neon_sat_shift_imm<q>")]
4571 (define_insn "neon_vshll<sup>_n<mode>"
4572 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4573 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4574 (match_operand:SI 2 "immediate_operand" "i")]
4578 /* The boundaries are: 0 < imm <= size. */
4579 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4580 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4582 [(set_attr "type" "neon_shift_imm_long")]
4586 (define_insn "neon_v<shift_op><sup>_n<mode>"
4587 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4588 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4589 (match_operand:VDQIX 2 "s_register_operand" "w")
4590 (match_operand:SI 3 "immediate_operand" "i")]
4594 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4595 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4597 [(set_attr "type" "neon_shift_acc<q>")]
4600 (define_insn "neon_vsri_n<mode>"
4601 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4602 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4603 (match_operand:VDQIX 2 "s_register_operand" "w")
4604 (match_operand:SI 3 "immediate_operand" "i")]
4608 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4609 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4611 [(set_attr "type" "neon_shift_reg<q>")]
4614 (define_insn "neon_vsli_n<mode>"
4615 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4616 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4617 (match_operand:VDQIX 2 "s_register_operand" "w")
4618 (match_operand:SI 3 "immediate_operand" "i")]
4622 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4623 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4625 [(set_attr "type" "neon_shift_reg<q>")]
4628 (define_insn "neon_vtbl1v8qi"
4629 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4630 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4631 (match_operand:V8QI 2 "s_register_operand" "w")]
4634 "vtbl.8\t%P0, {%P1}, %P2"
4635 [(set_attr "type" "neon_tbl1")]
4638 (define_insn "neon_vtbl2v8qi"
4639 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4640 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4641 (match_operand:V8QI 2 "s_register_operand" "w")]
4646 int tabbase = REGNO (operands[1]);
4648 ops[0] = operands[0];
4649 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4650 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4651 ops[3] = operands[2];
4652 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4656 [(set_attr "type" "neon_tbl2")]
4659 (define_insn "neon_vtbl3v8qi"
4660 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4661 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4662 (match_operand:V8QI 2 "s_register_operand" "w")]
4667 int tabbase = REGNO (operands[1]);
4669 ops[0] = operands[0];
4670 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4671 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4672 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4673 ops[4] = operands[2];
4674 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4678 [(set_attr "type" "neon_tbl3")]
4681 (define_insn "neon_vtbl4v8qi"
4682 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4683 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4684 (match_operand:V8QI 2 "s_register_operand" "w")]
4689 int tabbase = REGNO (operands[1]);
4691 ops[0] = operands[0];
4692 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4693 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4694 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4695 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4696 ops[5] = operands[2];
4697 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4701 [(set_attr "type" "neon_tbl4")]
4704 ;; These three are used by the vec_perm infrastructure for V16QImode.
4705 (define_insn_and_split "neon_vtbl1v16qi"
4706 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4707 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4708 (match_operand:V16QI 2 "s_register_operand" "w")]
4712 "&& reload_completed"
4715 rtx op0, op1, op2, part0, part2;
4719 op1 = gen_lowpart (TImode, operands[1]);
4722 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4723 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4724 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4725 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4727 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4728 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4729 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4730 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4733 [(set_attr "type" "multiple")]
4736 (define_insn_and_split "neon_vtbl2v16qi"
4737 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4738 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4739 (match_operand:V16QI 2 "s_register_operand" "w")]
4743 "&& reload_completed"
4746 rtx op0, op1, op2, part0, part2;
4753 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4754 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4755 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4756 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4758 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4759 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4760 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4761 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4764 [(set_attr "type" "multiple")]
4767 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4768 ;; handle quad-word input modes, producing octa-word output modes. But
4769 ;; that requires us to add support for octa-word vector modes in moves.
4770 ;; That seems overkill for this one use in vec_perm.
4771 (define_insn_and_split "neon_vcombinev16qi"
4772 [(set (match_operand:OI 0 "s_register_operand" "=w")
4773 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4774 (match_operand:V16QI 2 "s_register_operand" "w")]
4778 "&& reload_completed"
4781 neon_split_vcombine (operands);
4784 [(set_attr "type" "multiple")]
4787 (define_insn "neon_vtbx1v8qi"
4788 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4789 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4790 (match_operand:V8QI 2 "s_register_operand" "w")
4791 (match_operand:V8QI 3 "s_register_operand" "w")]
4794 "vtbx.8\t%P0, {%P2}, %P3"
4795 [(set_attr "type" "neon_tbl1")]
4798 (define_insn "neon_vtbx2v8qi"
4799 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4800 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4801 (match_operand:TI 2 "s_register_operand" "w")
4802 (match_operand:V8QI 3 "s_register_operand" "w")]
4807 int tabbase = REGNO (operands[2]);
4809 ops[0] = operands[0];
4810 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4811 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4812 ops[3] = operands[3];
4813 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4817 [(set_attr "type" "neon_tbl2")]
4820 (define_insn "neon_vtbx3v8qi"
4821 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4822 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4823 (match_operand:EI 2 "s_register_operand" "w")
4824 (match_operand:V8QI 3 "s_register_operand" "w")]
4829 int tabbase = REGNO (operands[2]);
4831 ops[0] = operands[0];
4832 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4833 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4834 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4835 ops[4] = operands[3];
4836 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4840 [(set_attr "type" "neon_tbl3")]
4843 (define_insn "neon_vtbx4v8qi"
4844 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4845 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4846 (match_operand:OI 2 "s_register_operand" "w")
4847 (match_operand:V8QI 3 "s_register_operand" "w")]
4852 int tabbase = REGNO (operands[2]);
4854 ops[0] = operands[0];
4855 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4856 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4857 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4858 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4859 ops[5] = operands[3];
4860 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4864 [(set_attr "type" "neon_tbl4")]
4867 (define_expand "@neon_vtrn<mode>_internal"
4869 [(set (match_operand:VDQWH 0 "s_register_operand")
4870 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4871 (match_operand:VDQWH 2 "s_register_operand")]
4873 (set (match_operand:VDQWH 3 "s_register_operand")
4874 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4879 ;; Note: Different operand numbering to handle tied registers correctly.
4880 (define_insn "*neon_vtrn<mode>_insn"
4881 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4882 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4883 (match_operand:VDQWH 3 "s_register_operand" "2")]
4885 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4886 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4889 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4890 [(set_attr "type" "neon_permute<q>")]
4893 (define_expand "@neon_vzip<mode>_internal"
4895 [(set (match_operand:VDQWH 0 "s_register_operand")
4896 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4897 (match_operand:VDQWH 2 "s_register_operand")]
4899 (set (match_operand:VDQWH 3 "s_register_operand")
4900 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4905 ;; Note: Different operand numbering to handle tied registers correctly.
4906 (define_insn "*neon_vzip<mode>_insn"
4907 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4908 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4909 (match_operand:VDQWH 3 "s_register_operand" "2")]
4911 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4912 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4915 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4916 [(set_attr "type" "neon_zip<q>")]
4919 (define_expand "@neon_vuzp<mode>_internal"
4921 [(set (match_operand:VDQWH 0 "s_register_operand")
4922 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4923 (match_operand:VDQWH 2 "s_register_operand")]
4925 (set (match_operand:VDQWH 3 "s_register_operand")
4926 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4931 ;; Note: Different operand numbering to handle tied registers correctly.
4932 (define_insn "*neon_vuzp<mode>_insn"
4933 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4934 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4935 (match_operand:VDQWH 3 "s_register_operand" "2")]
4937 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4938 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4941 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4942 [(set_attr "type" "neon_zip<q>")]
4945 (define_expand "vec_load_lanes<mode><mode>"
4946 [(set (match_operand:VDQX 0 "s_register_operand")
4947 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4951 (define_insn "neon_vld1<mode>"
4952 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4953 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4956 "vld1.<V_sz_elem>\t%h0, %A1"
4957 [(set_attr "type" "neon_load1_1reg<q>")]
4960 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4961 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4963 (define_insn "neon_vld1_lane<mode>"
4964 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4965 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4966 (match_operand:VDX 2 "s_register_operand" "0")
4967 (match_operand:SI 3 "immediate_operand" "i")]
4971 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4972 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4973 operands[3] = GEN_INT (lane);
4975 return "vld1.<V_sz_elem>\t%P0, %A1";
4977 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4979 [(set_attr "type" "neon_load1_one_lane<q>")]
4982 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4983 ;; here on big endian targets.
4984 (define_insn "neon_vld1_lane<mode>"
4985 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4986 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4987 (match_operand:VQX 2 "s_register_operand" "0")
4988 (match_operand:SI 3 "immediate_operand" "i")]
4992 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4993 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4994 operands[3] = GEN_INT (lane);
4995 int regno = REGNO (operands[0]);
4996 if (lane >= max / 2)
5000 operands[3] = GEN_INT (lane);
5002 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5004 return "vld1.<V_sz_elem>\t%P0, %A1";
5006 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5008 [(set_attr "type" "neon_load1_one_lane<q>")]
5011 (define_insn "neon_vld1_dup<mode>"
5012 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5013 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5015 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5016 [(set_attr "type" "neon_load1_all_lanes<q>")]
5019 ;; Special case for DImode. Treat it exactly like a simple load.
5020 (define_expand "neon_vld1_dupdi"
5021 [(set (match_operand:DI 0 "s_register_operand")
5022 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5028 (define_insn "neon_vld1_dup<mode>"
5029 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5030 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5033 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5035 [(set_attr "type" "neon_load1_all_lanes<q>")]
5038 (define_insn_and_split "neon_vld1_dupv2di"
5039 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5040 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5043 "&& reload_completed"
5046 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5047 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5048 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5051 [(set_attr "length" "8")
5052 (set_attr "type" "neon_load1_all_lanes_q")]
5055 (define_expand "vec_store_lanes<mode><mode>"
5056 [(set (match_operand:VDQX 0 "neon_struct_operand")
5057 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5061 (define_insn "neon_vst1<mode>"
5062 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5063 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5066 "vst1.<V_sz_elem>\t%h1, %A0"
5067 [(set_attr "type" "neon_store1_1reg<q>")])
5069 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5070 ;; here on big endian targets.
5071 (define_insn "neon_vst1_lane<mode>"
5072 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5074 [(match_operand:VDX 1 "s_register_operand" "w")
5075 (match_operand:SI 2 "immediate_operand" "i")]
5079 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5080 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5081 operands[2] = GEN_INT (lane);
5083 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5085 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5087 [(set_attr "type" "neon_store1_one_lane<q>")]
5090 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5091 ;; here on big endian targets.
5092 (define_insn "neon_vst1_lane<mode>"
5093 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5095 [(match_operand:VQX 1 "s_register_operand" "w")
5096 (match_operand:SI 2 "immediate_operand" "i")]
5100 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5101 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5102 int regno = REGNO (operands[1]);
5103 if (lane >= max / 2)
5108 operands[2] = GEN_INT (lane);
5109 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5111 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5113 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5115 [(set_attr "type" "neon_store1_one_lane<q>")]
5118 (define_expand "vec_load_lanesti<mode>"
5119 [(set (match_operand:TI 0 "s_register_operand")
5120 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5121 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125 (define_insn "neon_vld2<mode>"
5126 [(set (match_operand:TI 0 "s_register_operand" "=w")
5127 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5128 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132 if (<V_sz_elem> == 64)
5133 return "vld1.64\t%h0, %A1";
5135 return "vld2.<V_sz_elem>\t%h0, %A1";
5138 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5139 (const_string "neon_load1_2reg<q>")
5140 (const_string "neon_load2_2reg<q>")))]
5143 (define_insn "neon_vld2<mode>"
5144 [(set (match_operand:OI 0 "s_register_operand" "=w")
5145 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5146 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5149 "vld2.<V_sz_elem>\t%h0, %A1"
5150 [(set_attr "type" "neon_load2_2reg_q")])
5152 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5153 ;; here on big endian targets.
5154 (define_insn "neon_vld2_lane<mode>"
5155 [(set (match_operand:TI 0 "s_register_operand" "=w")
5156 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5157 (match_operand:TI 2 "s_register_operand" "0")
5158 (match_operand:SI 3 "immediate_operand" "i")
5159 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5163 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5164 int regno = REGNO (operands[0]);
5166 ops[0] = gen_rtx_REG (DImode, regno);
5167 ops[1] = gen_rtx_REG (DImode, regno + 2);
5168 ops[2] = operands[1];
5169 ops[3] = GEN_INT (lane);
5170 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5173 [(set_attr "type" "neon_load2_one_lane<q>")]
5176 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5177 ;; here on big endian targets.
5178 (define_insn "neon_vld2_lane<mode>"
5179 [(set (match_operand:OI 0 "s_register_operand" "=w")
5180 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5181 (match_operand:OI 2 "s_register_operand" "0")
5182 (match_operand:SI 3 "immediate_operand" "i")
5183 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5187 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5188 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5189 int regno = REGNO (operands[0]);
5191 if (lane >= max / 2)
5196 ops[0] = gen_rtx_REG (DImode, regno);
5197 ops[1] = gen_rtx_REG (DImode, regno + 4);
5198 ops[2] = operands[1];
5199 ops[3] = GEN_INT (lane);
5200 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5203 [(set_attr "type" "neon_load2_one_lane<q>")]
5206 (define_insn "neon_vld2_dup<mode>"
5207 [(set (match_operand:TI 0 "s_register_operand" "=w")
5208 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5209 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5214 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5216 return "vld1.<V_sz_elem>\t%h0, %A1";
5219 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5220 (const_string "neon_load2_all_lanes<q>")
5221 (const_string "neon_load1_1reg<q>")))]
5224 (define_insn "neon_vld2_dupv8bf"
5225 [(set (match_operand:OI 0 "s_register_operand" "=w")
5226 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5227 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232 int tabbase = REGNO (operands[0]);
5234 ops[4] = operands[1];
5235 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5236 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5237 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5238 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5239 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5242 [(set_attr "type" "neon_load2_all_lanes_q")]
5245 (define_expand "vec_store_lanesti<mode>"
5246 [(set (match_operand:TI 0 "neon_struct_operand")
5247 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5248 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 (define_insn "neon_vst2<mode>"
5253 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5254 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5255 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259 if (<V_sz_elem> == 64)
5260 return "vst1.64\t%h1, %A0";
5262 return "vst2.<V_sz_elem>\t%h1, %A0";
5265 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5266 (const_string "neon_store1_2reg<q>")
5267 (const_string "neon_store2_one_lane<q>")))]
5270 (define_insn "neon_vst2<mode>"
5271 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5272 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5273 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5276 "vst2.<V_sz_elem>\t%h1, %A0"
5277 [(set_attr "type" "neon_store2_4reg<q>")]
5280 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5281 ;; here on big endian targets.
5282 (define_insn "neon_vst2_lane<mode>"
5283 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5284 (unspec:<V_two_elem>
5285 [(match_operand:TI 1 "s_register_operand" "w")
5286 (match_operand:SI 2 "immediate_operand" "i")
5287 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5292 int regno = REGNO (operands[1]);
5294 ops[0] = operands[0];
5295 ops[1] = gen_rtx_REG (DImode, regno);
5296 ops[2] = gen_rtx_REG (DImode, regno + 2);
5297 ops[3] = GEN_INT (lane);
5298 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5301 [(set_attr "type" "neon_store2_one_lane<q>")]
5304 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5305 ;; here on big endian targets.
5306 (define_insn "neon_vst2_lane<mode>"
5307 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5308 (unspec:<V_two_elem>
5309 [(match_operand:OI 1 "s_register_operand" "w")
5310 (match_operand:SI 2 "immediate_operand" "i")
5311 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5315 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5316 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5317 int regno = REGNO (operands[1]);
5319 if (lane >= max / 2)
5324 ops[0] = operands[0];
5325 ops[1] = gen_rtx_REG (DImode, regno);
5326 ops[2] = gen_rtx_REG (DImode, regno + 4);
5327 ops[3] = GEN_INT (lane);
5328 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5331 [(set_attr "type" "neon_store2_one_lane<q>")]
5334 (define_expand "vec_load_lanesei<mode>"
5335 [(set (match_operand:EI 0 "s_register_operand")
5336 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5337 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 (define_insn "neon_vld3<mode>"
5342 [(set (match_operand:EI 0 "s_register_operand" "=w")
5343 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5344 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5348 if (<V_sz_elem> == 64)
5349 return "vld1.64\t%h0, %A1";
5351 return "vld3.<V_sz_elem>\t%h0, %A1";
5354 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5355 (const_string "neon_load1_3reg<q>")
5356 (const_string "neon_load3_3reg<q>")))]
5359 (define_expand "vec_load_lanesci<mode>"
5360 [(match_operand:CI 0 "s_register_operand")
5361 (match_operand:CI 1 "neon_struct_operand")
5362 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5365 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5369 (define_expand "neon_vld3<mode>"
5370 [(match_operand:CI 0 "s_register_operand")
5371 (match_operand:CI 1 "neon_struct_operand")
5372 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5377 mem = adjust_address (operands[1], EImode, 0);
5378 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5379 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5380 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5384 (define_insn "neon_vld3qa<mode>"
5385 [(set (match_operand:CI 0 "s_register_operand" "=w")
5386 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5387 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5391 int regno = REGNO (operands[0]);
5393 ops[0] = gen_rtx_REG (DImode, regno);
5394 ops[1] = gen_rtx_REG (DImode, regno + 4);
5395 ops[2] = gen_rtx_REG (DImode, regno + 8);
5396 ops[3] = operands[1];
5397 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5400 [(set_attr "type" "neon_load3_3reg<q>")]
5403 (define_insn "neon_vld3qb<mode>"
5404 [(set (match_operand:CI 0 "s_register_operand" "=w")
5405 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5406 (match_operand:CI 2 "s_register_operand" "0")
5407 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5411 int regno = REGNO (operands[0]);
5413 ops[0] = gen_rtx_REG (DImode, regno + 2);
5414 ops[1] = gen_rtx_REG (DImode, regno + 6);
5415 ops[2] = gen_rtx_REG (DImode, regno + 10);
5416 ops[3] = operands[1];
5417 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5420 [(set_attr "type" "neon_load3_3reg<q>")]
5423 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5424 ;; here on big endian targets.
5425 (define_insn "neon_vld3_lane<mode>"
5426 [(set (match_operand:EI 0 "s_register_operand" "=w")
5427 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5428 (match_operand:EI 2 "s_register_operand" "0")
5429 (match_operand:SI 3 "immediate_operand" "i")
5430 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5434 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5435 int regno = REGNO (operands[0]);
5437 ops[0] = gen_rtx_REG (DImode, regno);
5438 ops[1] = gen_rtx_REG (DImode, regno + 2);
5439 ops[2] = gen_rtx_REG (DImode, regno + 4);
5440 ops[3] = operands[1];
5441 ops[4] = GEN_INT (lane);
5442 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5446 [(set_attr "type" "neon_load3_one_lane<q>")]
5449 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5450 ;; here on big endian targets.
5451 (define_insn "neon_vld3_lane<mode>"
5452 [(set (match_operand:CI 0 "s_register_operand" "=w")
5453 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5454 (match_operand:CI 2 "s_register_operand" "0")
5455 (match_operand:SI 3 "immediate_operand" "i")
5456 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5460 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5461 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5462 int regno = REGNO (operands[0]);
5464 if (lane >= max / 2)
5469 ops[0] = gen_rtx_REG (DImode, regno);
5470 ops[1] = gen_rtx_REG (DImode, regno + 4);
5471 ops[2] = gen_rtx_REG (DImode, regno + 8);
5472 ops[3] = operands[1];
5473 ops[4] = GEN_INT (lane);
5474 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5478 [(set_attr "type" "neon_load3_one_lane<q>")]
5481 (define_insn "neon_vld3_dup<mode>"
5482 [(set (match_operand:EI 0 "s_register_operand" "=w")
5483 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5484 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5490 int regno = REGNO (operands[0]);
5492 ops[0] = gen_rtx_REG (DImode, regno);
5493 ops[1] = gen_rtx_REG (DImode, regno + 2);
5494 ops[2] = gen_rtx_REG (DImode, regno + 4);
5495 ops[3] = operands[1];
5496 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5500 return "vld1.<V_sz_elem>\t%h0, %A1";
5503 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5504 (const_string "neon_load3_all_lanes<q>")
5505 (const_string "neon_load1_1reg<q>")))])
5507 (define_insn "neon_vld3_dupv8bf"
5508 [(set (match_operand:CI 0 "s_register_operand" "=w")
5509 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5510 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5515 int tabbase = REGNO (operands[0]);
5517 ops[3] = operands[1];
5518 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5519 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5520 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5521 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5524 [(set_attr "type" "neon_load3_all_lanes_q")]
5527 (define_expand "vec_store_lanesei<mode>"
5528 [(set (match_operand:EI 0 "neon_struct_operand")
5529 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5530 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5534 (define_insn "neon_vst3<mode>"
5535 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5536 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5537 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5541 if (<V_sz_elem> == 64)
5542 return "vst1.64\t%h1, %A0";
5544 return "vst3.<V_sz_elem>\t%h1, %A0";
5547 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5548 (const_string "neon_store1_3reg<q>")
5549 (const_string "neon_store3_one_lane<q>")))])
5551 (define_expand "vec_store_lanesci<mode>"
5552 [(match_operand:CI 0 "neon_struct_operand")
5553 (match_operand:CI 1 "s_register_operand")
5554 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5557 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5561 (define_expand "neon_vst3<mode>"
5562 [(match_operand:CI 0 "neon_struct_operand")
5563 (match_operand:CI 1 "s_register_operand")
5564 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569 mem = adjust_address (operands[0], EImode, 0);
5570 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5571 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5572 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5576 (define_insn "neon_vst3qa<mode>"
5577 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5578 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5579 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5583 int regno = REGNO (operands[1]);
5585 ops[0] = operands[0];
5586 ops[1] = gen_rtx_REG (DImode, regno);
5587 ops[2] = gen_rtx_REG (DImode, regno + 4);
5588 ops[3] = gen_rtx_REG (DImode, regno + 8);
5589 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5592 [(set_attr "type" "neon_store3_3reg<q>")]
5595 (define_insn "neon_vst3qb<mode>"
5596 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5597 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5598 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5602 int regno = REGNO (operands[1]);
5604 ops[0] = operands[0];
5605 ops[1] = gen_rtx_REG (DImode, regno + 2);
5606 ops[2] = gen_rtx_REG (DImode, regno + 6);
5607 ops[3] = gen_rtx_REG (DImode, regno + 10);
5608 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5611 [(set_attr "type" "neon_store3_3reg<q>")]
5614 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5615 ;; here on big endian targets.
5616 (define_insn "neon_vst3_lane<mode>"
5617 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5618 (unspec:<V_three_elem>
5619 [(match_operand:EI 1 "s_register_operand" "w")
5620 (match_operand:SI 2 "immediate_operand" "i")
5621 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5626 int regno = REGNO (operands[1]);
5628 ops[0] = operands[0];
5629 ops[1] = gen_rtx_REG (DImode, regno);
5630 ops[2] = gen_rtx_REG (DImode, regno + 2);
5631 ops[3] = gen_rtx_REG (DImode, regno + 4);
5632 ops[4] = GEN_INT (lane);
5633 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5637 [(set_attr "type" "neon_store3_one_lane<q>")]
5640 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5641 ;; here on big endian targets.
5642 (define_insn "neon_vst3_lane<mode>"
5643 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5644 (unspec:<V_three_elem>
5645 [(match_operand:CI 1 "s_register_operand" "w")
5646 (match_operand:SI 2 "immediate_operand" "i")
5647 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5651 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5652 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5653 int regno = REGNO (operands[1]);
5655 if (lane >= max / 2)
5660 ops[0] = operands[0];
5661 ops[1] = gen_rtx_REG (DImode, regno);
5662 ops[2] = gen_rtx_REG (DImode, regno + 4);
5663 ops[3] = gen_rtx_REG (DImode, regno + 8);
5664 ops[4] = GEN_INT (lane);
5665 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5669 [(set_attr "type" "neon_store3_one_lane<q>")]
5672 (define_expand "vec_load_lanesoi<mode>"
5673 [(set (match_operand:OI 0 "s_register_operand")
5674 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5675 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5679 (define_insn "neon_vld4<mode>"
5680 [(set (match_operand:OI 0 "s_register_operand" "=w")
5681 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5682 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5686 if (<V_sz_elem> == 64)
5687 return "vld1.64\t%h0, %A1";
5689 return "vld4.<V_sz_elem>\t%h0, %A1";
5692 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5693 (const_string "neon_load1_4reg<q>")
5694 (const_string "neon_load4_4reg<q>")))]
5697 (define_expand "neon_vld4<mode>"
5698 [(match_operand:XI 0 "s_register_operand")
5699 (match_operand:XI 1 "neon_struct_operand")
5700 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5705 mem = adjust_address (operands[1], OImode, 0);
5706 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5707 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5708 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5712 (define_insn "neon_vld4qa<mode>"
5713 [(set (match_operand:XI 0 "s_register_operand" "=w")
5714 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5715 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719 int regno = REGNO (operands[0]);
5721 ops[0] = gen_rtx_REG (DImode, regno);
5722 ops[1] = gen_rtx_REG (DImode, regno + 4);
5723 ops[2] = gen_rtx_REG (DImode, regno + 8);
5724 ops[3] = gen_rtx_REG (DImode, regno + 12);
5725 ops[4] = operands[1];
5726 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5729 [(set_attr "type" "neon_load4_4reg<q>")]
5732 (define_insn "neon_vld4qb<mode>"
5733 [(set (match_operand:XI 0 "s_register_operand" "=w")
5734 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5735 (match_operand:XI 2 "s_register_operand" "0")
5736 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5740 int regno = REGNO (operands[0]);
5742 ops[0] = gen_rtx_REG (DImode, regno + 2);
5743 ops[1] = gen_rtx_REG (DImode, regno + 6);
5744 ops[2] = gen_rtx_REG (DImode, regno + 10);
5745 ops[3] = gen_rtx_REG (DImode, regno + 14);
5746 ops[4] = operands[1];
5747 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5750 [(set_attr "type" "neon_load4_4reg<q>")]
5753 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5754 ;; here on big endian targets.
5755 (define_insn "neon_vld4_lane<mode>"
5756 [(set (match_operand:OI 0 "s_register_operand" "=w")
5757 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5758 (match_operand:OI 2 "s_register_operand" "0")
5759 (match_operand:SI 3 "immediate_operand" "i")
5760 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5764 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5765 int regno = REGNO (operands[0]);
5767 ops[0] = gen_rtx_REG (DImode, regno);
5768 ops[1] = gen_rtx_REG (DImode, regno + 2);
5769 ops[2] = gen_rtx_REG (DImode, regno + 4);
5770 ops[3] = gen_rtx_REG (DImode, regno + 6);
5771 ops[4] = operands[1];
5772 ops[5] = GEN_INT (lane);
5773 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5777 [(set_attr "type" "neon_load4_one_lane<q>")]
5780 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5781 ;; here on big endian targets.
5782 (define_insn "neon_vld4_lane<mode>"
5783 [(set (match_operand:XI 0 "s_register_operand" "=w")
5784 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5785 (match_operand:XI 2 "s_register_operand" "0")
5786 (match_operand:SI 3 "immediate_operand" "i")
5787 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5791 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5792 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5793 int regno = REGNO (operands[0]);
5795 if (lane >= max / 2)
5800 ops[0] = gen_rtx_REG (DImode, regno);
5801 ops[1] = gen_rtx_REG (DImode, regno + 4);
5802 ops[2] = gen_rtx_REG (DImode, regno + 8);
5803 ops[3] = gen_rtx_REG (DImode, regno + 12);
5804 ops[4] = operands[1];
5805 ops[5] = GEN_INT (lane);
5806 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5810 [(set_attr "type" "neon_load4_one_lane<q>")]
5813 (define_insn "neon_vld4_dup<mode>"
5814 [(set (match_operand:OI 0 "s_register_operand" "=w")
5815 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5816 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5820 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5822 int regno = REGNO (operands[0]);
5824 ops[0] = gen_rtx_REG (DImode, regno);
5825 ops[1] = gen_rtx_REG (DImode, regno + 2);
5826 ops[2] = gen_rtx_REG (DImode, regno + 4);
5827 ops[3] = gen_rtx_REG (DImode, regno + 6);
5828 ops[4] = operands[1];
5829 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5834 return "vld1.<V_sz_elem>\t%h0, %A1";
5837 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5838 (const_string "neon_load4_all_lanes<q>")
5839 (const_string "neon_load1_1reg<q>")))]
5842 (define_insn "neon_vld4_dupv8bf"
5843 [(set (match_operand:XI 0 "s_register_operand" "=w")
5844 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5845 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5850 int tabbase = REGNO (operands[0]);
5852 ops[4] = operands[1];
5853 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5854 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5855 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5856 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5857 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5860 [(set_attr "type" "neon_load4_all_lanes_q")]
5863 (define_expand "vec_store_lanesoi<mode>"
5864 [(set (match_operand:OI 0 "neon_struct_operand")
5865 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5866 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5870 (define_insn "neon_vst4<mode>"
5871 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5872 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5873 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5877 if (<V_sz_elem> == 64)
5878 return "vst1.64\t%h1, %A0";
5880 return "vst4.<V_sz_elem>\t%h1, %A0";
5883 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5884 (const_string "neon_store1_4reg<q>")
5885 (const_string "neon_store4_4reg<q>")))]
5888 (define_expand "neon_vst4<mode>"
5889 [(match_operand:XI 0 "neon_struct_operand")
5890 (match_operand:XI 1 "s_register_operand")
5891 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5896 mem = adjust_address (operands[0], OImode, 0);
5897 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5898 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5899 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5903 (define_insn "neon_vst4qa<mode>"
5904 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5905 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5906 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5910 int regno = REGNO (operands[1]);
5912 ops[0] = operands[0];
5913 ops[1] = gen_rtx_REG (DImode, regno);
5914 ops[2] = gen_rtx_REG (DImode, regno + 4);
5915 ops[3] = gen_rtx_REG (DImode, regno + 8);
5916 ops[4] = gen_rtx_REG (DImode, regno + 12);
5917 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5920 [(set_attr "type" "neon_store4_4reg<q>")]
5923 (define_insn "neon_vst4qb<mode>"
5924 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5925 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5926 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5930 int regno = REGNO (operands[1]);
5932 ops[0] = operands[0];
5933 ops[1] = gen_rtx_REG (DImode, regno + 2);
5934 ops[2] = gen_rtx_REG (DImode, regno + 6);
5935 ops[3] = gen_rtx_REG (DImode, regno + 10);
5936 ops[4] = gen_rtx_REG (DImode, regno + 14);
5937 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5940 [(set_attr "type" "neon_store4_4reg<q>")]
5943 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5944 ;; here on big endian targets.
5945 (define_insn "neon_vst4_lane<mode>"
5946 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5947 (unspec:<V_four_elem>
5948 [(match_operand:OI 1 "s_register_operand" "w")
5949 (match_operand:SI 2 "immediate_operand" "i")
5950 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5954 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5955 int regno = REGNO (operands[1]);
5957 ops[0] = operands[0];
5958 ops[1] = gen_rtx_REG (DImode, regno);
5959 ops[2] = gen_rtx_REG (DImode, regno + 2);
5960 ops[3] = gen_rtx_REG (DImode, regno + 4);
5961 ops[4] = gen_rtx_REG (DImode, regno + 6);
5962 ops[5] = GEN_INT (lane);
5963 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5967 [(set_attr "type" "neon_store4_one_lane<q>")]
5970 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5971 ;; here on big endian targets.
5972 (define_insn "neon_vst4_lane<mode>"
5973 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5974 (unspec:<V_four_elem>
5975 [(match_operand:XI 1 "s_register_operand" "w")
5976 (match_operand:SI 2 "immediate_operand" "i")
5977 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5981 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5982 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5983 int regno = REGNO (operands[1]);
5985 if (lane >= max / 2)
5990 ops[0] = operands[0];
5991 ops[1] = gen_rtx_REG (DImode, regno);
5992 ops[2] = gen_rtx_REG (DImode, regno + 4);
5993 ops[3] = gen_rtx_REG (DImode, regno + 8);
5994 ops[4] = gen_rtx_REG (DImode, regno + 12);
5995 ops[5] = GEN_INT (lane);
5996 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6000 [(set_attr "type" "neon_store4_4reg<q>")]
6003 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6004 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6005 (SE:<V_unpack> (vec_select:<V_HALF>
6006 (match_operand:VU 1 "register_operand" "w")
6007 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6008 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6009 "vmovl.<US><V_sz_elem> %q0, %e1"
6010 [(set_attr "type" "neon_shift_imm_long")]
6013 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6014 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6015 (SE:<V_unpack> (vec_select:<V_HALF>
6016 (match_operand:VU 1 "register_operand" "w")
6017 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6018 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6019 "vmovl.<US><V_sz_elem> %q0, %f1"
6020 [(set_attr "type" "neon_shift_imm_long")]
6023 (define_expand "vec_unpack<US>_hi_<mode>"
6024 [(match_operand:<V_unpack> 0 "register_operand")
6025 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6026 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6028 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6031 for (i = 0; i < (<V_mode_nunits>/2); i++)
6032 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6034 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6035 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6042 (define_expand "vec_unpack<US>_lo_<mode>"
6043 [(match_operand:<V_unpack> 0 "register_operand")
6044 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6045 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6047 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6050 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6051 RTVEC_ELT (v, i) = GEN_INT (i);
6052 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6053 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6060 (define_insn "neon_vec_<US>mult_lo_<mode>"
6061 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6062 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6063 (match_operand:VU 1 "register_operand" "w")
6064 (match_operand:VU 2 "vect_par_constant_low" "")))
6065 (SE:<V_unpack> (vec_select:<V_HALF>
6066 (match_operand:VU 3 "register_operand" "w")
6068 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6069 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6070 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6073 (define_expand "vec_widen_<US>mult_lo_<mode>"
6074 [(match_operand:<V_unpack> 0 "register_operand")
6075 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6076 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6077 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6079 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6082 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6083 RTVEC_ELT (v, i) = GEN_INT (i);
6084 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6086 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6094 (define_insn "neon_vec_<US>mult_hi_<mode>"
6095 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6096 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6097 (match_operand:VU 1 "register_operand" "w")
6098 (match_operand:VU 2 "vect_par_constant_high" "")))
6099 (SE:<V_unpack> (vec_select:<V_HALF>
6100 (match_operand:VU 3 "register_operand" "w")
6102 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6103 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6104 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6107 (define_expand "vec_widen_<US>mult_hi_<mode>"
6108 [(match_operand:<V_unpack> 0 "register_operand")
6109 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6110 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6111 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6113 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6116 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6117 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6118 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6120 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6129 (define_insn "neon_vec_<US>shiftl_<mode>"
6130 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6131 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6132 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6135 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6137 [(set_attr "type" "neon_shift_imm_long")]
6140 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6141 [(match_operand:<V_unpack> 0 "register_operand")
6142 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6143 (match_operand:SI 2 "immediate_operand")]
6144 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6146 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6147 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6153 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6154 [(match_operand:<V_unpack> 0 "register_operand")
6155 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6156 (match_operand:SI 2 "immediate_operand")]
6157 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6159 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6160 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6161 GET_MODE_SIZE (<V_HALF>mode)),
6167 ;; Vectorize for non-neon-quad case
6168 (define_insn "neon_unpack<US>_<mode>"
6169 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6170 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6172 "vmovl.<US><V_sz_elem> %q0, %P1"
6173 [(set_attr "type" "neon_move")]
6176 (define_expand "vec_unpack<US>_lo_<mode>"
6177 [(match_operand:<V_double_width> 0 "register_operand")
6178 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6181 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6182 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6183 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6189 (define_expand "vec_unpack<US>_hi_<mode>"
6190 [(match_operand:<V_double_width> 0 "register_operand")
6191 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6194 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6195 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6196 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6202 (define_insn "neon_vec_<US>mult_<mode>"
6203 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6204 (mult:<V_widen> (SE:<V_widen>
6205 (match_operand:VDI 1 "register_operand" "w"))
6207 (match_operand:VDI 2 "register_operand" "w"))))]
6209 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6210 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6213 (define_expand "vec_widen_<US>mult_hi_<mode>"
6214 [(match_operand:<V_double_width> 0 "register_operand")
6215 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6216 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6219 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6220 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6221 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6228 (define_expand "vec_widen_<US>mult_lo_<mode>"
6229 [(match_operand:<V_double_width> 0 "register_operand")
6230 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6231 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6234 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6235 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6236 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6243 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6244 [(match_operand:<V_double_width> 0 "register_operand")
6245 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6246 (match_operand:SI 2 "immediate_operand")]
6249 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6250 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6251 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6257 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6258 [(match_operand:<V_double_width> 0 "register_operand")
6259 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6260 (match_operand:SI 2 "immediate_operand")]
6263 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6264 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6265 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6271 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6272 ; because the ordering of vector elements in Q registers is different from what
6273 ; the semantics of the instructions require.
6275 (define_insn "vec_pack_trunc_<mode>"
6276 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6277 (vec_concat:<V_narrow_pack>
6278 (truncate:<V_narrow>
6279 (match_operand:VN 1 "register_operand" "w"))
6280 (truncate:<V_narrow>
6281 (match_operand:VN 2 "register_operand" "w"))))]
6282 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6283 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6284 [(set_attr "type" "multiple")
6285 (set_attr "length" "8")]
6288 ;; For the non-quad case.
6289 (define_insn "neon_vec_pack_trunc_<mode>"
6290 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6291 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6292 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6293 "vmovn.i<V_sz_elem>\t%P0, %q1"
6294 [(set_attr "type" "neon_move_narrow_q")]
6297 (define_expand "vec_pack_trunc_<mode>"
6298 [(match_operand:<V_narrow_pack> 0 "register_operand")
6299 (match_operand:VSHFT 1 "register_operand")
6300 (match_operand:VSHFT 2 "register_operand")]
6301 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6303 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6305 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6306 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6307 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6311 (define_insn "neon_vabd<mode>_2"
6312 [(set (match_operand:VF 0 "s_register_operand" "=w")
6313 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6314 (match_operand:VF 2 "s_register_operand" "w"))))]
6315 "ARM_HAVE_NEON_<MODE>_ARITH"
6316 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6317 [(set_attr "type" "neon_fp_abd_s<q>")]
6320 (define_insn "neon_vabd<mode>_3"
6321 [(set (match_operand:VF 0 "s_register_operand" "=w")
6322 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6323 (match_operand:VF 2 "s_register_operand" "w")]
6325 "ARM_HAVE_NEON_<MODE>_ARITH"
6326 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6327 [(set_attr "type" "neon_fp_abd_s<q>")]
6330 (define_insn "neon_<sup>mmlav16qi"
6331 [(set (match_operand:V4SI 0 "register_operand" "=w")
6333 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6334 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6335 (match_operand:V4SI 1 "register_operand" "0")))]
6337 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6338 [(set_attr "type" "neon_mla_s_q")]
6341 (define_insn "neon_vbfdot<VCVTF:mode>"
6342 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6343 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6345 (match_operand:<VSF2BF> 2 "register_operand" "w")
6346 (match_operand:<VSF2BF> 3 "register_operand" "w")]
6349 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6350 [(set_attr "type" "neon_dot<q>")]
6353 (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6354 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6355 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6357 (match_operand:<VSF2BF> 2 "register_operand" "w")
6358 (match_operand:V4BF 3 "register_operand" "x")
6359 (match_operand:SI 4 "immediate_operand" "i")]
6362 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6363 [(set_attr "type" "neon_dot<q>")]
6366 (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6367 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6368 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6370 (match_operand:<VSF2BF> 2 "register_operand" "w")
6371 (match_operand:V8BF 3 "register_operand" "x")
6372 (match_operand:SI 4 "immediate_operand" "i")]
6376 int lane = INTVAL (operands[4]);
6377 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6379 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6382 operands[4] = GEN_INT (lane - half);
6383 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6386 [(set_attr "type" "neon_dot<q>")]
6389 (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6390 [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6391 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6394 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6395 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6398 (define_insn "neon_vbfcvtv4sf_highv8bf"
6399 [(set (match_operand:V8BF 0 "register_operand" "=w")
6400 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6401 (match_operand:V4SF 2 "register_operand" "w")]
6402 UNSPEC_BFCVT_HIGH))]
6404 "vcvt.bf16.f32\\t%f0, %q2"
6405 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6408 (define_insn "neon_vbfcvtsf"
6409 [(set (match_operand:BF 0 "register_operand" "=t")
6410 (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6413 "vcvtb.bf16.f32\\t%0, %1"
6414 [(set_attr "type" "f_cvt")]
6417 (define_insn "neon_vbfcvt<VBFCVT:mode>"
6418 [(set (match_operand:V4SF 0 "register_operand" "=w")
6419 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6422 "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6423 [(set_attr "type" "neon_shift_imm_q")]
6426 (define_insn "neon_vbfcvt_highv8bf"
6427 [(set (match_operand:V4SF 0 "register_operand" "=w")
6428 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6429 UNSPEC_BFCVT_HIGH))]
6431 "vshll.u32\\t%q0, %f1, #16"
6432 [(set_attr "type" "neon_shift_imm_q")]
6435 ;; Convert a BF scalar operand to SF via VSHL.
6436 ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6437 ;; would be allocated, therefore the operands must be converted to intermediate
6438 ;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6439 (define_expand "neon_vbfcvtbf"
6440 [(match_operand:SF 0 "register_operand")
6441 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6444 rtx op0 = gen_reg_rtx (V2SImode);
6445 rtx op1 = gen_reg_rtx (V2SImode);
6446 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6447 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6448 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6452 ;; Convert BF mode to V2SI and V2SI to SF.
6453 ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6454 ;; register indexed by a 32-bit sub-register number.
6455 ;; This will generate reloads but compiler can optimize out the moves.
6456 ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6457 ;; range so that to avoid extra moves.
6458 (define_insn "neon_vbfcvtbf_cvtmode<mode>"
6459 [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6460 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6466 (define_insn "neon_vmmlav8bf"
6467 [(set (match_operand:V4SF 0 "register_operand" "=w")
6468 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6469 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6470 (match_operand:V8BF 3 "register_operand" "w")]
6473 "vmmla.bf16\\t%q0, %q2, %q3"
6474 [(set_attr "type" "neon_fp_mla_s_q")]
6477 (define_insn "neon_vfma<bt>v8bf"
6478 [(set (match_operand:V4SF 0 "register_operand" "=w")
6479 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6480 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6481 (match_operand:V8BF 3 "register_operand" "w")]
6484 "vfma<bt>.bf16\\t%q0, %q2, %q3"
6485 [(set_attr "type" "neon_fp_mla_s_q")]
6488 (define_insn "neon_vfma<bt>_lanev8bf"
6489 [(set (match_operand:V4SF 0 "register_operand" "=w")
6490 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6491 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6492 (match_operand:V4BF 3 "register_operand" "x")
6493 (match_operand:SI 4 "const_int_operand" "n")]
6496 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6497 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6500 (define_expand "neon_vfma<bt>_laneqv8bf"
6501 [(set (match_operand:V4SF 0 "register_operand" "=w")
6502 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6503 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6504 (match_operand:V8BF 3 "register_operand" "x")
6505 (match_operand:SI 4 "const_int_operand" "n")]
6509 int lane = INTVAL (operands[4]);
6510 gcc_assert (IN_RANGE(lane, 0, 7));
6513 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6517 rtx op_highpart = gen_reg_rtx (V4BFmode);
6518 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6519 operands[4] = GEN_INT (lane - 4);
6520 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6524 [(set_attr "type" "neon_fp_mla_s_scalar_q")]