1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode><V_elem_l>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode><V_elem_l>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2didi"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode><V_elem_l>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
511 (define_insn "add<mode>3"
513 (match_operand:VH 0 "s_register_operand" "=w")
515 (match_operand:VH 1 "s_register_operand" "w")
516 (match_operand:VH 2 "s_register_operand" "w")))]
517 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
520 (if_then_else (match_test "<Is_float_mode>")
521 (const_string "neon_fp_addsub_s<q>")
522 (const_string "neon_add<q>")))]
525 (define_insn "add<mode>3_fp16"
527 (match_operand:VH 0 "s_register_operand" "=w")
529 (match_operand:VH 1 "s_register_operand" "w")
530 (match_operand:VH 2 "s_register_operand" "w")))]
531 "TARGET_NEON_FP16INST"
532 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
534 (if_then_else (match_test "<Is_float_mode>")
535 (const_string "neon_fp_addsub_s<q>")
536 (const_string "neon_add<q>")))]
539 (define_insn "adddi3_neon"
540 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
543 (clobber (reg:CC CC_REGNUM))]
546 switch (which_alternative)
548 case 0: /* fall through */
549 case 3: return "vadd.i64\t%P0, %P1, %P2";
555 default: gcc_unreachable ();
558 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559 multiple,multiple,multiple")
560 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561 (set_attr "length" "*,8,8,*,8,8,8")
562 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
565 (define_insn "*sub<mode>3_neon"
566 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568 (match_operand:VDQ 2 "s_register_operand" "w")))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_addsub_s<q>")
574 (const_string "neon_sub<q>")))]
577 (define_insn "sub<mode>3"
579 (match_operand:VH 0 "s_register_operand" "=w")
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
588 (define_insn "sub<mode>3_fp16"
590 (match_operand:VH 0 "s_register_operand" "=w")
592 (match_operand:VH 1 "s_register_operand" "w")
593 (match_operand:VH 2 "s_register_operand" "w")))]
594 "TARGET_NEON_FP16INST"
595 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596 [(set_attr "type" "neon_sub<q>")]
599 (define_insn "subdi3_neon"
600 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603 (clobber (reg:CC CC_REGNUM))]
606 switch (which_alternative)
608 case 0: /* fall through */
609 case 4: return "vsub.i64\t%P0, %P1, %P2";
610 case 1: /* fall through */
611 case 2: /* fall through */
612 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613 default: gcc_unreachable ();
616 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617 (set_attr "conds" "*,clob,clob,clob,*")
618 (set_attr "length" "*,8,8,8,*")
619 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
622 (define_insn "*mul<mode>3_neon"
623 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625 (match_operand:VDQW 2 "s_register_operand" "w")))]
626 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 (if_then_else (match_test "<Is_float_mode>")
630 (const_string "neon_fp_mul_s<q>")
631 (const_string "neon_mul_<V_elem_ch><q>")))]
634 (define_insn "mul<mode>3add<mode>_neon"
635 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637 (match_operand:VDQW 3 "s_register_operand" "w"))
638 (match_operand:VDQW 1 "s_register_operand" "0")))]
639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
642 (if_then_else (match_test "<Is_float_mode>")
643 (const_string "neon_fp_mla_s<q>")
644 (const_string "neon_mla_<V_elem_ch><q>")))]
647 (define_insn "mul<mode>3add<mode>_neon"
648 [(set (match_operand:VH 0 "s_register_operand" "=w")
649 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650 (match_operand:VH 3 "s_register_operand" "w"))
651 (match_operand:VH 1 "s_register_operand" "0")))]
652 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661 (match_operand:VDQW 3 "s_register_operand" "w"))))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;; to be able to use when converting to FMA.
674 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676 [(set (match_operand:VCVTF 0 "register_operand" "=w")
677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678 (match_operand:VCVTF 2 "register_operand" "w")
679 (match_operand:VCVTF 3 "register_operand" "0")))]
680 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682 [(set_attr "type" "neon_fp_mla_s<q>")]
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686 [(set (match_operand:VCVTF 0 "register_operand" "=w")
687 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688 (match_operand:VCVTF 2 "register_operand" "w")
689 (match_operand:VCVTF 3 "register_operand" "0")))]
690 "TARGET_NEON && TARGET_FMA"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
695 (define_insn "fma<VH:mode>4"
696 [(set (match_operand:VH 0 "register_operand" "=w")
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VH:mode>4_intrinsic"
707 [(set (match_operand:VH 0 "register_operand" "=w")
709 (match_operand:VH 1 "register_operand" "w")
710 (match_operand:VH 2 "register_operand" "w")
711 (match_operand:VH 3 "register_operand" "0")))]
712 "TARGET_NEON_FP16INST"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
717 (define_insn "*fmsub<VCVTF:mode>4"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728 [(set (match_operand:VCVTF 0 "register_operand" "=w")
730 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731 (match_operand:VCVTF 2 "register_operand" "w")
732 (match_operand:VCVTF 3 "register_operand" "0")))]
733 "TARGET_NEON && TARGET_FMA"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739 [(set (match_operand:VH 0 "register_operand" "=w")
741 (neg:VH (match_operand:VH 1 "register_operand" "w"))
742 (match_operand:VH 2 "register_operand" "w")
743 (match_operand:VH 3 "register_operand" "0")))]
744 "TARGET_NEON_FP16INST"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751 (unspec:VCVTF [(match_operand:VCVTF 1
752 "s_register_operand" "w")]
754 "TARGET_NEON && TARGET_VFP5"
755 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761 (FIXUORS:<V_cmp_result> (unspec:VCVTF
762 [(match_operand:VCVTF 1 "register_operand" "w")]
764 "TARGET_NEON && TARGET_VFP5"
765 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767 (set_attr "predicable" "no")]
770 (define_insn "ior<mode>3"
771 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
776 switch (which_alternative)
778 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781 default: gcc_unreachable ();
784 [(set_attr "type" "neon_logic<q>")]
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
792 (define_insn "and<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
798 switch (which_alternative)
800 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vand", &operands[2],
802 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
806 [(set_attr "type" "neon_logic<q>")]
809 (define_insn "orn<mode>3_neon"
810 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812 (match_operand:VDQ 1 "s_register_operand" "w")))]
814 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815 [(set_attr "type" "neon_logic<q>")]
818 ;; TODO: investigate whether we should disable
819 ;; this and bicdi3_neon for the A8 in line with the other
821 (define_insn_and_split "orndi3_neon"
822 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
832 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
839 operands[3] = gen_highpart (SImode, operands[0]);
840 operands[0] = gen_lowpart (SImode, operands[0]);
841 operands[4] = gen_highpart (SImode, operands[2]);
842 operands[2] = gen_lowpart (SImode, operands[2]);
843 operands[5] = gen_highpart (SImode, operands[1]);
844 operands[1] = gen_lowpart (SImode, operands[1]);
848 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
853 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854 (set_attr "length" "*,16,8,8")
855 (set_attr "arch" "any,a,t2,t2")]
858 (define_insn "bic<mode>3_neon"
859 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861 (match_operand:VDQ 1 "s_register_operand" "w")))]
863 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 [(set_attr "type" "neon_logic<q>")]
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
877 [(set_attr "type" "neon_logic,multiple,multiple")
878 (set_attr "length" "*,8,8")]
881 (define_insn "xor<mode>3"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884 (match_operand:VDQ 2 "s_register_operand" "w")))]
886 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
890 (define_insn "one_cmpl<mode>2"
891 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
894 "vmvn\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_move<q>")]
898 (define_insn "abs<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
902 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_abs_s<q>")
906 (const_string "neon_abs<q>")))]
909 (define_insn "neg<mode>2"
910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
913 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
915 (if_then_else (match_test "<Is_float_mode>")
916 (const_string "neon_fp_neg_s<q>")
917 (const_string "neon_neg<q>")))]
920 (define_insn "negdi2_neon"
921 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
922 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
923 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
924 (clobber (reg:CC CC_REGNUM))]
927 [(set_attr "length" "8")
928 (set_attr "type" "multiple")]
931 ; Split negdi2_neon for vfp registers
933 [(set (match_operand:DI 0 "s_register_operand" "")
934 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935 (clobber (match_scratch:DI 2 ""))
936 (clobber (reg:CC CC_REGNUM))]
937 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938 [(set (match_dup 2) (const_int 0))
939 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940 (clobber (reg:CC CC_REGNUM))])]
942 if (!REG_P (operands[2]))
943 operands[2] = operands[0];
947 ; Split negdi2_neon for core registers
949 [(set (match_operand:DI 0 "s_register_operand" "")
950 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951 (clobber (match_scratch:DI 2 ""))
952 (clobber (reg:CC CC_REGNUM))]
953 "TARGET_32BIT && reload_completed
954 && arm_general_register_operand (operands[0], DImode)"
955 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956 (clobber (reg:CC CC_REGNUM))])]
960 (define_insn "<absneg_str><mode>2"
961 [(set (match_operand:VH 0 "s_register_operand" "=w")
962 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963 "TARGET_NEON_FP16INST"
964 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965 [(set_attr "type" "neon_abs<q>")]
968 (define_expand "neon_v<absneg_str><mode>"
970 (match_operand:VH 0 "s_register_operand")
971 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972 "TARGET_NEON_FP16INST"
974 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979 [(set (match_operand:VH 0 "s_register_operand" "=w")
981 [(match_operand:VH 1 "s_register_operand" "w")]
983 "TARGET_NEON_FP16INST"
984 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_round_s<q>")]
988 (define_insn "neon_vrsqrte<mode>"
989 [(set (match_operand:VH 0 "s_register_operand" "=w")
991 [(match_operand:VH 1 "s_register_operand" "w")]
993 "TARGET_NEON_FP16INST"
994 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
998 (define_insn "*umin<mode>3_neon"
999 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1003 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 [(set_attr "type" "neon_minmax<q>")]
1007 (define_insn "*umax<mode>3_neon"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1012 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013 [(set_attr "type" "neon_minmax<q>")]
1016 (define_insn "*smin<mode>3_neon"
1017 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019 (match_operand:VDQW 2 "s_register_operand" "w")))]
1021 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1023 (if_then_else (match_test "<Is_float_mode>")
1024 (const_string "neon_fp_minmax_s<q>")
1025 (const_string "neon_minmax<q>")))]
1028 (define_insn "*smax<mode>3_neon"
1029 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031 (match_operand:VDQW 2 "s_register_operand" "w")))]
1033 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1035 (if_then_else (match_test "<Is_float_mode>")
1036 (const_string "neon_fp_minmax_s<q>")
1037 (const_string "neon_minmax<q>")))]
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code. It ends up creating a V2DI constructor with
1044 (define_insn "vashl<mode>3"
1045 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1050 switch (which_alternative)
1052 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1055 VALID_NEON_QREG_MODE (<MODE>mode),
1057 default: gcc_unreachable ();
1060 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1063 (define_insn "vashr<mode>3_imm"
1064 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1069 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1073 [(set_attr "type" "neon_shift_imm<q>")]
1076 (define_insn "vlshr<mode>3_imm"
1077 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1082 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1086 [(set_attr "type" "neon_shift_imm<q>")]
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1094 (define_insn "ashl<mode>3_signed"
1095 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097 (match_operand:VDQI 2 "s_register_operand" "w")]
1098 UNSPEC_ASHIFT_SIGNED))]
1100 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101 [(set_attr "type" "neon_shift_reg<q>")]
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1107 (define_insn "ashl<mode>3_unsigned"
1108 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110 (match_operand:VDQI 2 "s_register_operand" "w")]
1111 UNSPEC_ASHIFT_UNSIGNED))]
1113 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114 [(set_attr "type" "neon_shift_reg<q>")]
1117 (define_expand "vashr<mode>3"
1118 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1123 if (s_register_operand (operands[2], <MODE>mode))
1125 rtx neg = gen_reg_rtx (<MODE>mode);
1126 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1130 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1134 (define_expand "vlshr<mode>3"
1135 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1140 if (s_register_operand (operands[2], <MODE>mode))
1142 rtx neg = gen_reg_rtx (<MODE>mode);
1143 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1147 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized. This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1158 (define_insn "neon_load_count"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161 UNSPEC_LOAD_COUNT))]
1164 vld1.32\t{%P0[0]}, %A1
1165 vmov.32\t%P0[0], %1"
1166 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1169 (define_insn "ashldi3_neon_noclobber"
1170 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1171 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173 "TARGET_NEON && reload_completed
1174 && (!CONST_INT_P (operands[2])
1175 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1177 vshl.u64\t%P0, %P1, %2
1178 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1182 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1186 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1187 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1189 (clobber (reg:CC_C CC_REGNUM))]
1192 "TARGET_NEON && reload_completed"
1196 if (IS_VFP_REGNUM (REGNO (operands[0])))
1198 if (CONST_INT_P (operands[2]))
1200 if (INTVAL (operands[2]) < 1)
1202 emit_insn (gen_movdi (operands[0], operands[1]));
1205 else if (INTVAL (operands[2]) > 63)
1206 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1210 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211 operands[2] = operands[5];
1214 /* Ditch the unnecessary clobbers. */
1215 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1220 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1]));
1224 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1225 operands[2], operands[3], operands[4]);
1229 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1230 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1231 (set_attr "type" "multiple")]
1234 ; The shift amount needs to be negated for right-shifts
1235 (define_insn "signed_shift_di3_neon"
1236 [(set (match_operand:DI 0 "s_register_operand" "=w")
1237 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1238 (match_operand:DI 2 "s_register_operand" " w")]
1239 UNSPEC_ASHIFT_SIGNED))]
1240 "TARGET_NEON && reload_completed"
1241 "vshl.s64\t%P0, %P1, %P2"
1242 [(set_attr "type" "neon_shift_reg")]
1245 ; The shift amount needs to be negated for right-shifts
1246 (define_insn "unsigned_shift_di3_neon"
1247 [(set (match_operand:DI 0 "s_register_operand" "=w")
1248 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1249 (match_operand:DI 2 "s_register_operand" " w")]
1250 UNSPEC_ASHIFT_UNSIGNED))]
1251 "TARGET_NEON && reload_completed"
1252 "vshl.u64\t%P0, %P1, %P2"
1253 [(set_attr "type" "neon_shift_reg")]
1256 (define_insn "ashrdi3_neon_imm_noclobber"
1257 [(set (match_operand:DI 0 "s_register_operand" "=w")
1258 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1259 (match_operand:DI 2 "const_int_operand" " i")))]
1260 "TARGET_NEON && reload_completed
1261 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1262 "vshr.s64\t%P0, %P1, %2"
1263 [(set_attr "type" "neon_shift_imm")]
1266 (define_insn "lshrdi3_neon_imm_noclobber"
1267 [(set (match_operand:DI 0 "s_register_operand" "=w")
1268 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1269 (match_operand:DI 2 "const_int_operand" " i")))]
1270 "TARGET_NEON && reload_completed
1271 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1272 "vshr.u64\t%P0, %P1, %2"
1273 [(set_attr "type" "neon_shift_imm")]
1278 (define_insn_and_split "<shift>di3_neon"
1279 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1280 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1281 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1282 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1283 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1284 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1285 (clobber (reg:CC CC_REGNUM))]
1288 "TARGET_NEON && reload_completed"
1292 if (IS_VFP_REGNUM (REGNO (operands[0])))
1294 if (CONST_INT_P (operands[2]))
1296 if (INTVAL (operands[2]) < 1)
1298 emit_insn (gen_movdi (operands[0], operands[1]));
1301 else if (INTVAL (operands[2]) > 64)
1302 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1304 /* Ditch the unnecessary clobbers. */
1305 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1311 /* We must use a negative left-shift. */
1312 emit_insn (gen_negsi2 (operands[3], operands[2]));
1313 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1314 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1320 /* The shift expanders support either full overlap or no overlap. */
1321 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1322 || REGNO (operands[0]) == REGNO (operands[1]));
1324 /* This clobbers CC (ASHIFTRT by register only). */
1325 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1326 operands[2], operands[3], operands[4]);
1331 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1332 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1333 (set_attr "type" "multiple")]
1336 ;; Widening operations
1338 (define_expand "widen_ssum<mode>3"
1339 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1340 (plus:<V_double_width>
1341 (sign_extend:<V_double_width>
1342 (match_operand:VQI 1 "s_register_operand" ""))
1343 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1346 machine_mode mode = GET_MODE (operands[1]);
1349 p1 = arm_simd_vect_par_cnst_half (mode, false);
1350 p2 = arm_simd_vect_par_cnst_half (mode, true);
1352 if (operands[0] != operands[2])
1353 emit_move_insn (operands[0], operands[2]);
1355 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1359 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1367 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1368 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1369 (plus:<V_double_width>
1370 (sign_extend:<V_double_width>
1371 (vec_select:<V_HALF>
1372 (match_operand:VQI 1 "s_register_operand" "%w")
1373 (match_operand:VQI 2 "vect_par_constant_low" "")))
1374 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1377 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1378 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1380 [(set_attr "type" "neon_add_widen")])
1382 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1383 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1384 (plus:<V_double_width>
1385 (sign_extend:<V_double_width>
1386 (vec_select:<V_HALF>
1387 (match_operand:VQI 1 "s_register_operand" "%w")
1388 (match_operand:VQI 2 "vect_par_constant_high" "")))
1389 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1392 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1393 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1395 [(set_attr "type" "neon_add_widen")])
1397 (define_insn "widen_ssum<mode>3"
1398 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1400 (sign_extend:<V_widen>
1401 (match_operand:VW 1 "s_register_operand" "%w"))
1402 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1404 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1405 [(set_attr "type" "neon_add_widen")]
1408 (define_expand "widen_usum<mode>3"
1409 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1410 (plus:<V_double_width>
1411 (zero_extend:<V_double_width>
1412 (match_operand:VQI 1 "s_register_operand" ""))
1413 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1416 machine_mode mode = GET_MODE (operands[1]);
1419 p1 = arm_simd_vect_par_cnst_half (mode, false);
1420 p2 = arm_simd_vect_par_cnst_half (mode, true);
1422 if (operands[0] != operands[2])
1423 emit_move_insn (operands[0], operands[2]);
1425 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1429 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1437 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1438 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1439 (plus:<V_double_width>
1440 (zero_extend:<V_double_width>
1441 (vec_select:<V_HALF>
1442 (match_operand:VQI 1 "s_register_operand" "%w")
1443 (match_operand:VQI 2 "vect_par_constant_low" "")))
1444 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1447 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1448 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1450 [(set_attr "type" "neon_add_widen")])
1452 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1453 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1454 (plus:<V_double_width>
1455 (zero_extend:<V_double_width>
1456 (vec_select:<V_HALF>
1457 (match_operand:VQI 1 "s_register_operand" "%w")
1458 (match_operand:VQI 2 "vect_par_constant_high" "")))
1459 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1462 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1463 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1465 [(set_attr "type" "neon_add_widen")])
1467 (define_insn "widen_usum<mode>3"
1468 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1469 (plus:<V_widen> (zero_extend:<V_widen>
1470 (match_operand:VW 1 "s_register_operand" "%w"))
1471 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1473 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1474 [(set_attr "type" "neon_add_widen")]
1477 ;; Helpers for quad-word reduction operations
1479 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1480 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1481 ; N/2-element vector.
1483 (define_insn "quad_halves_<code>v4si"
1484 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1486 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1487 (parallel [(const_int 0) (const_int 1)]))
1488 (vec_select:V2SI (match_dup 1)
1489 (parallel [(const_int 2) (const_int 3)]))))]
1491 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1492 [(set_attr "vqh_mnem" "<VQH_mnem>")
1493 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1496 (define_insn "quad_halves_<code>v4sf"
1497 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1499 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1500 (parallel [(const_int 0) (const_int 1)]))
1501 (vec_select:V2SF (match_dup 1)
1502 (parallel [(const_int 2) (const_int 3)]))))]
1503 "TARGET_NEON && flag_unsafe_math_optimizations"
1504 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1505 [(set_attr "vqh_mnem" "<VQH_mnem>")
1506 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1509 (define_insn "quad_halves_<code>v8hi"
1510 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1512 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1513 (parallel [(const_int 0) (const_int 1)
1514 (const_int 2) (const_int 3)]))
1515 (vec_select:V4HI (match_dup 1)
1516 (parallel [(const_int 4) (const_int 5)
1517 (const_int 6) (const_int 7)]))))]
1519 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1520 [(set_attr "vqh_mnem" "<VQH_mnem>")
1521 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1524 (define_insn "quad_halves_<code>v16qi"
1525 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1527 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1528 (parallel [(const_int 0) (const_int 1)
1529 (const_int 2) (const_int 3)
1530 (const_int 4) (const_int 5)
1531 (const_int 6) (const_int 7)]))
1532 (vec_select:V8QI (match_dup 1)
1533 (parallel [(const_int 8) (const_int 9)
1534 (const_int 10) (const_int 11)
1535 (const_int 12) (const_int 13)
1536 (const_int 14) (const_int 15)]))))]
1538 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1539 [(set_attr "vqh_mnem" "<VQH_mnem>")
1540 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1543 (define_expand "move_hi_quad_<mode>"
1544 [(match_operand:ANY128 0 "s_register_operand" "")
1545 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1548 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1549 GET_MODE_SIZE (<V_HALF>mode)),
1554 (define_expand "move_lo_quad_<mode>"
1555 [(match_operand:ANY128 0 "s_register_operand" "")
1556 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1559 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1565 ;; Reduction operations
1567 (define_expand "reduc_plus_scal_<mode>"
1568 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1569 (match_operand:VD 1 "s_register_operand" "")]
1570 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1572 rtx vec = gen_reg_rtx (<MODE>mode);
1573 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1574 &gen_neon_vpadd_internal<mode>);
1575 /* The same result is actually computed into every element. */
1576 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1580 (define_expand "reduc_plus_scal_<mode>"
1581 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1582 (match_operand:VQ 1 "s_register_operand" "")]
1583 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1584 && !BYTES_BIG_ENDIAN"
1586 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1588 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1589 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1594 (define_expand "reduc_plus_scal_v2di"
1595 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1596 (match_operand:V2DI 1 "s_register_operand" "")]
1597 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1599 rtx vec = gen_reg_rtx (V2DImode);
1601 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1602 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1607 (define_insn "arm_reduc_plus_internal_v2di"
1608 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1609 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1611 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1612 "vadd.i64\t%e0, %e1, %f1"
1613 [(set_attr "type" "neon_add_q")]
1616 (define_expand "reduc_smin_scal_<mode>"
1617 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1618 (match_operand:VD 1 "s_register_operand" "")]
1619 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1621 rtx vec = gen_reg_rtx (<MODE>mode);
1623 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1624 &gen_neon_vpsmin<mode>);
1625 /* The result is computed into every element of the vector. */
1626 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1630 (define_expand "reduc_smin_scal_<mode>"
1631 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1632 (match_operand:VQ 1 "s_register_operand" "")]
1633 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1634 && !BYTES_BIG_ENDIAN"
1636 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1638 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1639 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1644 (define_expand "reduc_smax_scal_<mode>"
1645 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1646 (match_operand:VD 1 "s_register_operand" "")]
1647 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1649 rtx vec = gen_reg_rtx (<MODE>mode);
1650 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1651 &gen_neon_vpsmax<mode>);
1652 /* The result is computed into every element of the vector. */
1653 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1657 (define_expand "reduc_smax_scal_<mode>"
1658 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1659 (match_operand:VQ 1 "s_register_operand" "")]
1660 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1661 && !BYTES_BIG_ENDIAN"
1663 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1665 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1666 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1671 (define_expand "reduc_umin_scal_<mode>"
1672 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1673 (match_operand:VDI 1 "s_register_operand" "")]
1676 rtx vec = gen_reg_rtx (<MODE>mode);
1677 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1678 &gen_neon_vpumin<mode>);
1679 /* The result is computed into every element of the vector. */
1680 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1684 (define_expand "reduc_umin_scal_<mode>"
1685 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1686 (match_operand:VQI 1 "s_register_operand" "")]
1687 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1689 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1691 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1692 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1697 (define_expand "reduc_umax_scal_<mode>"
1698 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1699 (match_operand:VDI 1 "s_register_operand" "")]
1702 rtx vec = gen_reg_rtx (<MODE>mode);
1703 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1704 &gen_neon_vpumax<mode>);
1705 /* The result is computed into every element of the vector. */
1706 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1710 (define_expand "reduc_umax_scal_<mode>"
1711 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1712 (match_operand:VQI 1 "s_register_operand" "")]
1713 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1715 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1717 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1718 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1723 (define_insn "neon_vpadd_internal<mode>"
1724 [(set (match_operand:VD 0 "s_register_operand" "=w")
1725 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1726 (match_operand:VD 2 "s_register_operand" "w")]
1729 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1730 ;; Assume this schedules like vadd.
1732 (if_then_else (match_test "<Is_float_mode>")
1733 (const_string "neon_fp_reduc_add_s<q>")
1734 (const_string "neon_reduc_add<q>")))]
1737 (define_insn "neon_vpaddv4hf"
1739 (match_operand:V4HF 0 "s_register_operand" "=w")
1740 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1741 (match_operand:V4HF 2 "s_register_operand" "w")]
1743 "TARGET_NEON_FP16INST"
1744 "vpadd.f16\t%P0, %P1, %P2"
1745 [(set_attr "type" "neon_reduc_add")]
1748 (define_insn "neon_vpsmin<mode>"
1749 [(set (match_operand:VD 0 "s_register_operand" "=w")
1750 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1751 (match_operand:VD 2 "s_register_operand" "w")]
1754 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1756 (if_then_else (match_test "<Is_float_mode>")
1757 (const_string "neon_fp_reduc_minmax_s<q>")
1758 (const_string "neon_reduc_minmax<q>")))]
1761 (define_insn "neon_vpsmax<mode>"
1762 [(set (match_operand:VD 0 "s_register_operand" "=w")
1763 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1764 (match_operand:VD 2 "s_register_operand" "w")]
1767 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1769 (if_then_else (match_test "<Is_float_mode>")
1770 (const_string "neon_fp_reduc_minmax_s<q>")
1771 (const_string "neon_reduc_minmax<q>")))]
1774 (define_insn "neon_vpumin<mode>"
1775 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1776 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1777 (match_operand:VDI 2 "s_register_operand" "w")]
1780 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1781 [(set_attr "type" "neon_reduc_minmax<q>")]
1784 (define_insn "neon_vpumax<mode>"
1785 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1786 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1787 (match_operand:VDI 2 "s_register_operand" "w")]
1790 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1791 [(set_attr "type" "neon_reduc_minmax<q>")]
1794 ;; Saturating arithmetic
1796 ; NOTE: Neon supports many more saturating variants of instructions than the
1797 ; following, but these are all GCC currently understands.
1798 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1799 ; yet either, although these patterns may be used by intrinsics when they're
1802 (define_insn "*ss_add<mode>_neon"
1803 [(set (match_operand:VD 0 "s_register_operand" "=w")
1804 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1805 (match_operand:VD 2 "s_register_operand" "w")))]
1807 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1808 [(set_attr "type" "neon_qadd<q>")]
1811 (define_insn "*us_add<mode>_neon"
1812 [(set (match_operand:VD 0 "s_register_operand" "=w")
1813 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1814 (match_operand:VD 2 "s_register_operand" "w")))]
1816 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1817 [(set_attr "type" "neon_qadd<q>")]
1820 (define_insn "*ss_sub<mode>_neon"
1821 [(set (match_operand:VD 0 "s_register_operand" "=w")
1822 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1823 (match_operand:VD 2 "s_register_operand" "w")))]
1825 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1826 [(set_attr "type" "neon_qsub<q>")]
1829 (define_insn "*us_sub<mode>_neon"
1830 [(set (match_operand:VD 0 "s_register_operand" "=w")
1831 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1832 (match_operand:VD 2 "s_register_operand" "w")))]
1834 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1835 [(set_attr "type" "neon_qsub<q>")]
1838 ;; Conditional instructions. These are comparisons with conditional moves for
1839 ;; vectors. They perform the assignment:
1841 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1843 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1846 (define_expand "vcond<mode><mode>"
1847 [(set (match_operand:VDQW 0 "s_register_operand" "")
1849 (match_operator 3 "comparison_operator"
1850 [(match_operand:VDQW 4 "s_register_operand" "")
1851 (match_operand:VDQW 5 "nonmemory_operand" "")])
1852 (match_operand:VDQW 1 "s_register_operand" "")
1853 (match_operand:VDQW 2 "s_register_operand" "")))]
1854 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1857 int use_zero_form = 0;
1858 int swap_bsl_operands = 0;
1859 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1860 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1862 rtx (*base_comparison) (rtx, rtx, rtx);
1863 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1865 switch (GET_CODE (operands[3]))
1872 if (operands[5] == CONST0_RTX (<MODE>mode))
1879 if (!REG_P (operands[5]))
1880 operands[5] = force_reg (<MODE>mode, operands[5]);
1883 switch (GET_CODE (operands[3]))
1893 base_comparison = gen_neon_vcge<mode>;
1894 complimentary_comparison = gen_neon_vcgt<mode>;
1902 base_comparison = gen_neon_vcgt<mode>;
1903 complimentary_comparison = gen_neon_vcge<mode>;
1908 base_comparison = gen_neon_vceq<mode>;
1909 complimentary_comparison = gen_neon_vceq<mode>;
1915 switch (GET_CODE (operands[3]))
1922 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1923 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1929 Note that there also exist direct comparison against 0 forms,
1930 so catch those as a special case. */
1934 switch (GET_CODE (operands[3]))
1937 base_comparison = gen_neon_vclt<mode>;
1940 base_comparison = gen_neon_vcle<mode>;
1943 /* Do nothing, other zero form cases already have the correct
1950 emit_insn (base_comparison (mask, operands[4], operands[5]));
1952 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1959 /* Vector compare returns false for lanes which are unordered, so if we use
1960 the inverse of the comparison we actually want to emit, then
1961 swap the operands to BSL, we will end up with the correct result.
1962 Note that a NE NaN and NaN NE b are true for all a, b.
1964 Our transformations are:
1969 a NE b -> !(a EQ b) */
1972 emit_insn (base_comparison (mask, operands[4], operands[5]));
1974 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1976 swap_bsl_operands = 1;
1979 /* We check (a > b || b > a). combining these comparisons give us
1980 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1981 will then give us (a == b || a UNORDERED b) as intended. */
1983 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1984 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1985 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1986 swap_bsl_operands = 1;
1989 /* Operands are ORDERED iff (a > b || b >= a).
1990 Swapping the operands to BSL will give the UNORDERED case. */
1991 swap_bsl_operands = 1;
1994 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1995 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1996 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2002 if (swap_bsl_operands)
2003 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2006 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2011 (define_expand "vcondu<mode><mode>"
2012 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2014 (match_operator 3 "arm_comparison_operator"
2015 [(match_operand:VDQIW 4 "s_register_operand" "")
2016 (match_operand:VDQIW 5 "s_register_operand" "")])
2017 (match_operand:VDQIW 1 "s_register_operand" "")
2018 (match_operand:VDQIW 2 "s_register_operand" "")))]
2022 int inverse = 0, immediate_zero = 0;
2024 mask = gen_reg_rtx (<V_cmp_result>mode);
2026 if (operands[5] == CONST0_RTX (<MODE>mode))
2028 else if (!REG_P (operands[5]))
2029 operands[5] = force_reg (<MODE>mode, operands[5]);
2031 switch (GET_CODE (operands[3]))
2034 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2038 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2042 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2047 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2049 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2054 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2056 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2060 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2069 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2072 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2078 ;; Patterns for builtins.
2080 ; good for plain vadd, vaddq.
2082 (define_expand "neon_vadd<mode>"
2083 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2084 (match_operand:VCVTF 1 "s_register_operand" "w")
2085 (match_operand:VCVTF 2 "s_register_operand" "w")]
2088 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2089 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2091 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2096 (define_expand "neon_vadd<mode>"
2097 [(match_operand:VH 0 "s_register_operand")
2098 (match_operand:VH 1 "s_register_operand")
2099 (match_operand:VH 2 "s_register_operand")]
2100 "TARGET_NEON_FP16INST"
2102 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2106 (define_expand "neon_vsub<mode>"
2107 [(match_operand:VH 0 "s_register_operand")
2108 (match_operand:VH 1 "s_register_operand")
2109 (match_operand:VH 2 "s_register_operand")]
2110 "TARGET_NEON_FP16INST"
2112 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2116 ; Note that NEON operations don't support the full IEEE 754 standard: in
2117 ; particular, denormal values are flushed to zero. This means that GCC cannot
2118 ; use those instructions for autovectorization, etc. unless
2119 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2120 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2121 ; header) must work in either case: if -funsafe-math-optimizations is given,
2122 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2123 ; expand to unspecs (which may potentially limit the extent to which they might
2124 ; be optimized by generic code).
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2128 (define_insn "neon_vadd<mode>_unspec"
2129 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2130 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2131 (match_operand:VCVTF 2 "s_register_operand" "w")]
2134 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2136 (if_then_else (match_test "<Is_float_mode>")
2137 (const_string "neon_fp_addsub_s<q>")
2138 (const_string "neon_add<q>")))]
2141 (define_insn "neon_vaddl<sup><mode>"
2142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2143 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2144 (match_operand:VDI 2 "s_register_operand" "w")]
2147 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2148 [(set_attr "type" "neon_add_long")]
2151 (define_insn "neon_vaddw<sup><mode>"
2152 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2153 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2154 (match_operand:VDI 2 "s_register_operand" "w")]
2157 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2158 [(set_attr "type" "neon_add_widen")]
2163 (define_insn "neon_v<r>hadd<sup><mode>"
2164 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2165 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2166 (match_operand:VDQIW 2 "s_register_operand" "w")]
2169 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170 [(set_attr "type" "neon_add_halve_q")]
2173 (define_insn "neon_vqadd<sup><mode>"
2174 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2175 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2176 (match_operand:VDQIX 2 "s_register_operand" "w")]
2179 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2180 [(set_attr "type" "neon_qadd<q>")]
2183 (define_insn "neon_v<r>addhn<mode>"
2184 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2185 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2186 (match_operand:VN 2 "s_register_operand" "w")]
2189 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2190 [(set_attr "type" "neon_add_halve_narrow_q")]
2193 ;; Polynomial and Float multiplication.
2194 (define_insn "neon_vmul<pf><mode>"
2195 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2196 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2197 (match_operand:VPF 2 "s_register_operand" "w")]
2200 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2202 (if_then_else (match_test "<Is_float_mode>")
2203 (const_string "neon_fp_mul_s<q>")
2204 (const_string "neon_mul_<V_elem_ch><q>")))]
2207 (define_insn "mul<mode>3"
2209 (match_operand:VH 0 "s_register_operand" "=w")
2211 (match_operand:VH 1 "s_register_operand" "w")
2212 (match_operand:VH 2 "s_register_operand" "w")))]
2213 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2214 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2215 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2218 (define_insn "neon_vmulf<mode>"
2220 (match_operand:VH 0 "s_register_operand" "=w")
2222 (match_operand:VH 1 "s_register_operand" "w")
2223 (match_operand:VH 2 "s_register_operand" "w")))]
2224 "TARGET_NEON_FP16INST"
2225 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2226 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2229 (define_expand "neon_vmla<mode>"
2230 [(match_operand:VDQW 0 "s_register_operand" "=w")
2231 (match_operand:VDQW 1 "s_register_operand" "0")
2232 (match_operand:VDQW 2 "s_register_operand" "w")
2233 (match_operand:VDQW 3 "s_register_operand" "w")]
2236 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2237 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2238 operands[2], operands[3]));
2240 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2241 operands[2], operands[3]));
2245 (define_expand "neon_vfma<VCVTF:mode>"
2246 [(match_operand:VCVTF 0 "s_register_operand")
2247 (match_operand:VCVTF 1 "s_register_operand")
2248 (match_operand:VCVTF 2 "s_register_operand")
2249 (match_operand:VCVTF 3 "s_register_operand")]
2250 "TARGET_NEON && TARGET_FMA"
2252 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2257 (define_expand "neon_vfma<VH:mode>"
2258 [(match_operand:VH 0 "s_register_operand")
2259 (match_operand:VH 1 "s_register_operand")
2260 (match_operand:VH 2 "s_register_operand")
2261 (match_operand:VH 3 "s_register_operand")]
2262 "TARGET_NEON_FP16INST"
2264 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2269 (define_expand "neon_vfms<VCVTF:mode>"
2270 [(match_operand:VCVTF 0 "s_register_operand")
2271 (match_operand:VCVTF 1 "s_register_operand")
2272 (match_operand:VCVTF 2 "s_register_operand")
2273 (match_operand:VCVTF 3 "s_register_operand")]
2274 "TARGET_NEON && TARGET_FMA"
2276 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2281 (define_expand "neon_vfms<VH:mode>"
2282 [(match_operand:VH 0 "s_register_operand")
2283 (match_operand:VH 1 "s_register_operand")
2284 (match_operand:VH 2 "s_register_operand")
2285 (match_operand:VH 3 "s_register_operand")]
2286 "TARGET_NEON_FP16INST"
2288 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2293 ;; The expand RTL structure here is not important.
2294 ;; We use the gen_* functions anyway.
2295 ;; We just need something to wrap the iterators around.
2297 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2298 [(set (match_operand:VCVTF 0 "s_register_operand")
2300 [(match_operand:VCVTF 1 "s_register_operand")
2302 (match_operand:<VFML> 2 "s_register_operand")
2303 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2306 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2307 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2315 (define_insn "vfmal_low<mode>_intrinsic"
2316 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2319 (vec_select:<VFMLSEL>
2320 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2321 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2323 (vec_select:<VFMLSEL>
2324 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2325 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2326 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2328 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2329 [(set_attr "type" "neon_fp_mla_s<q>")]
2332 (define_insn "vfmsl_high<mode>_intrinsic"
2333 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2337 (vec_select:<VFMLSEL>
2338 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2339 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2341 (vec_select:<VFMLSEL>
2342 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2343 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2344 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2346 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2347 [(set_attr "type" "neon_fp_mla_s<q>")]
2350 (define_insn "vfmal_high<mode>_intrinsic"
2351 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2354 (vec_select:<VFMLSEL>
2355 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2356 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2358 (vec_select:<VFMLSEL>
2359 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2360 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2361 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2363 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2364 [(set_attr "type" "neon_fp_mla_s<q>")]
2367 (define_insn "vfmsl_low<mode>_intrinsic"
2368 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2372 (vec_select:<VFMLSEL>
2373 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2374 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2376 (vec_select:<VFMLSEL>
2377 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2378 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2379 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2381 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2382 [(set_attr "type" "neon_fp_mla_s<q>")]
2385 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2386 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2388 [(match_operand:VCVTF 1 "s_register_operand")
2390 (match_operand:<VFML> 2 "s_register_operand")
2391 (match_operand:<VFML> 3 "s_register_operand"))
2392 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2395 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2396 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2397 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2398 (operands[0], operands[1],
2399 operands[2], operands[3],
2404 (define_insn "vfmal_lane_low<mode>_intrinsic"
2405 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2408 (vec_select:<VFMLSEL>
2409 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2410 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2412 (vec_duplicate:<VFMLSEL>
2414 (match_operand:<VFML> 3 "s_register_operand" "x")
2415 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2416 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2419 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2420 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2422 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2423 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2427 operands[5] = GEN_INT (lane);
2428 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2431 [(set_attr "type" "neon_fp_mla_s<q>")]
2434 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2435 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2437 [(match_operand:VCVTF 1 "s_register_operand")
2439 (match_operand:<VFML> 2 "s_register_operand")
2440 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2441 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2445 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2446 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2447 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2448 (operands[0], operands[1], operands[2], operands[3],
2453 ;; Used to implement the intrinsics:
2454 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2455 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2456 ;; Needs a bit of care to get the modes of the different sub-expressions right
2457 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2458 ;; S or D subregister to select the appropriate lane from.
2460 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2461 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2464 (vec_select:<VFMLSEL>
2465 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2466 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2468 (vec_duplicate:<VFMLSEL>
2470 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2471 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2472 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2475 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2476 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2477 int new_lane = lane % elts_per_reg;
2478 int regdiff = lane / elts_per_reg;
2479 operands[5] = GEN_INT (new_lane);
2480 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2481 because we want the print_operand code to print the appropriate
2482 S or D register prefix. */
2483 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2484 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2485 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2487 [(set_attr "type" "neon_fp_mla_s<q>")]
2490 ;; Used to implement the intrinsics:
2491 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2492 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2493 ;; Needs a bit of care to get the modes of the different sub-expressions right
2494 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2495 ;; S or D subregister to select the appropriate lane from.
2497 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2498 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2501 (vec_select:<VFMLSEL>
2502 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2503 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2505 (vec_duplicate:<VFMLSEL>
2507 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2508 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2509 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2512 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2513 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2514 int new_lane = lane % elts_per_reg;
2515 int regdiff = lane / elts_per_reg;
2516 operands[5] = GEN_INT (new_lane);
2517 /* We re-create operands[3] in the halved VFMLSEL mode
2518 because we've calculated the correct half-width subreg to extract
2519 the lane from and we want to print *that* subreg instead. */
2520 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2521 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2523 [(set_attr "type" "neon_fp_mla_s<q>")]
2526 (define_insn "vfmal_lane_high<mode>_intrinsic"
2527 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2530 (vec_select:<VFMLSEL>
2531 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2532 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2534 (vec_duplicate:<VFMLSEL>
2536 (match_operand:<VFML> 3 "s_register_operand" "x")
2537 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2538 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2541 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2542 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2544 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2545 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2549 operands[5] = GEN_INT (lane);
2550 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2553 [(set_attr "type" "neon_fp_mla_s<q>")]
2556 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2557 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2561 (vec_select:<VFMLSEL>
2562 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2563 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2565 (vec_duplicate:<VFMLSEL>
2567 (match_operand:<VFML> 3 "s_register_operand" "x")
2568 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2569 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2572 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2573 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2575 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2576 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2580 operands[5] = GEN_INT (lane);
2581 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2584 [(set_attr "type" "neon_fp_mla_s<q>")]
2587 ;; Used to implement the intrinsics:
2588 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2589 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2590 ;; Needs a bit of care to get the modes of the different sub-expressions right
2591 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2592 ;; S or D subregister to select the appropriate lane from.
2594 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2595 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2599 (vec_select:<VFMLSEL>
2600 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2601 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2603 (vec_duplicate:<VFMLSEL>
2605 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2606 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2607 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2610 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2611 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2612 int new_lane = lane % elts_per_reg;
2613 int regdiff = lane / elts_per_reg;
2614 operands[5] = GEN_INT (new_lane);
2615 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2616 because we want the print_operand code to print the appropriate
2617 S or D register prefix. */
2618 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2619 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2620 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2622 [(set_attr "type" "neon_fp_mla_s<q>")]
2625 ;; Used to implement the intrinsics:
2626 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2627 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2628 ;; Needs a bit of care to get the modes of the different sub-expressions right
2629 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2630 ;; S or D subregister to select the appropriate lane from.
2632 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2633 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2637 (vec_select:<VFMLSEL>
2638 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2639 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2641 (vec_duplicate:<VFMLSEL>
2643 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2644 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2645 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2648 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2649 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2650 int new_lane = lane % elts_per_reg;
2651 int regdiff = lane / elts_per_reg;
2652 operands[5] = GEN_INT (new_lane);
2653 /* We re-create operands[3] in the halved VFMLSEL mode
2654 because we've calculated the correct half-width subreg to extract
2655 the lane from and we want to print *that* subreg instead. */
2656 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2657 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2659 [(set_attr "type" "neon_fp_mla_s<q>")]
2662 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2663 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2667 (vec_select:<VFMLSEL>
2668 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2669 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2671 (vec_duplicate:<VFMLSEL>
2673 (match_operand:<VFML> 3 "s_register_operand" "x")
2674 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2675 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2678 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2679 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2681 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2682 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2686 operands[5] = GEN_INT (lane);
2687 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2690 [(set_attr "type" "neon_fp_mla_s<q>")]
2693 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2695 (define_insn "neon_vmla<mode>_unspec"
2696 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2697 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2698 (match_operand:VDQW 2 "s_register_operand" "w")
2699 (match_operand:VDQW 3 "s_register_operand" "w")]
2702 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2704 (if_then_else (match_test "<Is_float_mode>")
2705 (const_string "neon_fp_mla_s<q>")
2706 (const_string "neon_mla_<V_elem_ch><q>")))]
2709 (define_insn "neon_vmlal<sup><mode>"
2710 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2711 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2712 (match_operand:VW 2 "s_register_operand" "w")
2713 (match_operand:VW 3 "s_register_operand" "w")]
2716 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2717 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2720 (define_expand "neon_vmls<mode>"
2721 [(match_operand:VDQW 0 "s_register_operand" "=w")
2722 (match_operand:VDQW 1 "s_register_operand" "0")
2723 (match_operand:VDQW 2 "s_register_operand" "w")
2724 (match_operand:VDQW 3 "s_register_operand" "w")]
2727 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2728 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2729 operands[1], operands[2], operands[3]));
2731 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2732 operands[2], operands[3]));
2736 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2738 (define_insn "neon_vmls<mode>_unspec"
2739 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2740 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2741 (match_operand:VDQW 2 "s_register_operand" "w")
2742 (match_operand:VDQW 3 "s_register_operand" "w")]
2745 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2747 (if_then_else (match_test "<Is_float_mode>")
2748 (const_string "neon_fp_mla_s<q>")
2749 (const_string "neon_mla_<V_elem_ch><q>")))]
2752 (define_insn "neon_vmlsl<sup><mode>"
2753 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2754 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2755 (match_operand:VW 2 "s_register_operand" "w")
2756 (match_operand:VW 3 "s_register_operand" "w")]
2759 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2760 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2763 ;; vqdmulh, vqrdmulh
2764 (define_insn "neon_vq<r>dmulh<mode>"
2765 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2766 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2767 (match_operand:VMDQI 2 "s_register_operand" "w")]
2770 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2771 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2774 ;; vqrdmlah, vqrdmlsh
2775 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2776 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2777 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2778 (match_operand:VMDQI 2 "s_register_operand" "w")
2779 (match_operand:VMDQI 3 "s_register_operand" "w")]
2782 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2783 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2786 (define_insn "neon_vqdmlal<mode>"
2787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2788 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2789 (match_operand:VMDI 2 "s_register_operand" "w")
2790 (match_operand:VMDI 3 "s_register_operand" "w")]
2793 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2794 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2797 (define_insn "neon_vqdmlsl<mode>"
2798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2799 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2800 (match_operand:VMDI 2 "s_register_operand" "w")
2801 (match_operand:VMDI 3 "s_register_operand" "w")]
2804 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2805 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2808 (define_insn "neon_vmull<sup><mode>"
2809 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2811 (match_operand:VW 2 "s_register_operand" "w")]
2814 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2815 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2818 (define_insn "neon_vqdmull<mode>"
2819 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2821 (match_operand:VMDI 2 "s_register_operand" "w")]
2824 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2825 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2828 (define_expand "neon_vsub<mode>"
2829 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2830 (match_operand:VCVTF 1 "s_register_operand" "w")
2831 (match_operand:VCVTF 2 "s_register_operand" "w")]
2834 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2835 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2837 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2842 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2844 (define_insn "neon_vsub<mode>_unspec"
2845 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2846 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2847 (match_operand:VCVTF 2 "s_register_operand" "w")]
2850 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2852 (if_then_else (match_test "<Is_float_mode>")
2853 (const_string "neon_fp_addsub_s<q>")
2854 (const_string "neon_sub<q>")))]
2857 (define_insn "neon_vsubl<sup><mode>"
2858 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2859 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2860 (match_operand:VDI 2 "s_register_operand" "w")]
2863 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2864 [(set_attr "type" "neon_sub_long")]
2867 (define_insn "neon_vsubw<sup><mode>"
2868 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2869 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2870 (match_operand:VDI 2 "s_register_operand" "w")]
2873 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2874 [(set_attr "type" "neon_sub_widen")]
2877 (define_insn "neon_vqsub<sup><mode>"
2878 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2879 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2880 (match_operand:VDQIX 2 "s_register_operand" "w")]
2883 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2884 [(set_attr "type" "neon_qsub<q>")]
2887 (define_insn "neon_vhsub<sup><mode>"
2888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2889 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2890 (match_operand:VDQIW 2 "s_register_operand" "w")]
2893 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2894 [(set_attr "type" "neon_sub_halve<q>")]
2897 (define_insn "neon_v<r>subhn<mode>"
2898 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2899 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2900 (match_operand:VN 2 "s_register_operand" "w")]
2903 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2904 [(set_attr "type" "neon_sub_halve_narrow_q")]
2907 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2908 ;; without unsafe math optimizations.
2909 (define_expand "neon_vc<cmp_op><mode>"
2910 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2912 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2913 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2916 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2918 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2919 && !flag_unsafe_math_optimizations)
2921 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2922 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2923 whereas this expander iterates over the integer modes as well,
2924 but we will never expand to UNSPECs for the integer comparisons. */
2928 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2933 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2942 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2949 (define_insn "neon_vc<cmp_op><mode>_insn"
2950 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2952 (COMPARISONS:<V_cmp_result>
2953 (match_operand:VDQW 1 "s_register_operand" "w,w")
2954 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2955 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2956 && !flag_unsafe_math_optimizations)"
2959 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2961 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2962 ? "f" : "<cmp_type>",
2963 which_alternative == 0
2964 ? "%<V_reg>2" : "#0");
2965 output_asm_insn (pattern, operands);
2969 (if_then_else (match_operand 2 "zero_operand")
2970 (const_string "neon_compare_zero<q>")
2971 (const_string "neon_compare<q>")))]
2974 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2975 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2976 (unspec:<V_cmp_result>
2977 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2978 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2983 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2985 which_alternative == 0
2986 ? "%<V_reg>2" : "#0");
2987 output_asm_insn (pattern, operands);
2990 [(set_attr "type" "neon_fp_compare_s<q>")]
2993 (define_expand "neon_vc<cmp_op><mode>"
2994 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2997 (match_operand:VH 1 "s_register_operand")
2998 (match_operand:VH 2 "reg_or_zero_operand")))]
2999 "TARGET_NEON_FP16INST"
3001 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3003 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3004 && !flag_unsafe_math_optimizations)
3006 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3007 (operands[0], operands[1], operands[2]));
3010 (gen_neon_vc<cmp_op><mode>_fp16insn
3011 (operands[0], operands[1], operands[2]));
3015 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3016 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3018 (COMPARISONS:<V_cmp_result>
3019 (match_operand:VH 1 "s_register_operand" "w,w")
3020 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3021 "TARGET_NEON_FP16INST
3022 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3023 && !flag_unsafe_math_optimizations)"
3026 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3028 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3029 ? "f" : "<cmp_type>",
3030 which_alternative == 0
3031 ? "%<V_reg>2" : "#0");
3032 output_asm_insn (pattern, operands);
3036 (if_then_else (match_operand 2 "zero_operand")
3037 (const_string "neon_compare_zero<q>")
3038 (const_string "neon_compare<q>")))])
3040 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3042 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3043 (unspec:<V_cmp_result>
3044 [(match_operand:VH 1 "s_register_operand" "w,w")
3045 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3047 "TARGET_NEON_FP16INST"
3050 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3052 which_alternative == 0
3053 ? "%<V_reg>2" : "#0");
3054 output_asm_insn (pattern, operands);
3057 [(set_attr "type" "neon_fp_compare_s<q>")])
3059 (define_insn "neon_vc<cmp_op>u<mode>"
3060 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3062 (GTUGEU:<V_cmp_result>
3063 (match_operand:VDQIW 1 "s_register_operand" "w")
3064 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3066 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3067 [(set_attr "type" "neon_compare<q>")]
3070 (define_expand "neon_vca<cmp_op><mode>"
3071 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3073 (GTGE:<V_cmp_result>
3074 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3075 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3078 if (flag_unsafe_math_optimizations)
3079 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3082 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3089 (define_insn "neon_vca<cmp_op><mode>_insn"
3090 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3092 (GTGE:<V_cmp_result>
3093 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3094 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3095 "TARGET_NEON && flag_unsafe_math_optimizations"
3096 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3097 [(set_attr "type" "neon_fp_compare_s<q>")]
3100 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3101 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3102 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3103 (match_operand:VCVTF 2 "s_register_operand" "w")]
3106 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3107 [(set_attr "type" "neon_fp_compare_s<q>")]
3110 (define_expand "neon_vca<cmp_op><mode>"
3112 (match_operand:<V_cmp_result> 0 "s_register_operand")
3114 (GLTE:<V_cmp_result>
3115 (abs:VH (match_operand:VH 1 "s_register_operand"))
3116 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3117 "TARGET_NEON_FP16INST"
3119 if (flag_unsafe_math_optimizations)
3120 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3121 (operands[0], operands[1], operands[2]));
3123 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3124 (operands[0], operands[1], operands[2]));
3128 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3130 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3132 (GLTE:<V_cmp_result>
3133 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3134 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3135 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3136 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_compare_s<q>")]
3140 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3141 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3142 (unspec:<V_cmp_result>
3143 [(match_operand:VH 1 "s_register_operand" "w")
3144 (match_operand:VH 2 "s_register_operand" "w")]
3147 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3148 [(set_attr "type" "neon_fp_compare_s<q>")]
3151 (define_expand "neon_vc<cmp_op>z<mode>"
3153 (match_operand:<V_cmp_result> 0 "s_register_operand")
3154 (COMPARISONS:<V_cmp_result>
3155 (match_operand:VH 1 "s_register_operand")
3157 "TARGET_NEON_FP16INST"
3159 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3160 CONST0_RTX (<MODE>mode)));
3164 (define_insn "neon_vtst<mode>"
3165 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3166 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3167 (match_operand:VDQIW 2 "s_register_operand" "w")]
3170 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3171 [(set_attr "type" "neon_tst<q>")]
3174 (define_insn "neon_vabd<sup><mode>"
3175 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3176 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3177 (match_operand:VDQIW 2 "s_register_operand" "w")]
3180 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3181 [(set_attr "type" "neon_abd<q>")]
3184 (define_insn "neon_vabd<mode>"
3185 [(set (match_operand:VH 0 "s_register_operand" "=w")
3186 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3187 (match_operand:VH 2 "s_register_operand" "w")]
3189 "TARGET_NEON_FP16INST"
3190 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3191 [(set_attr "type" "neon_abd<q>")]
3194 (define_insn "neon_vabdf<mode>"
3195 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3196 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3197 (match_operand:VCVTF 2 "s_register_operand" "w")]
3200 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3201 [(set_attr "type" "neon_fp_abd_s<q>")]
3204 (define_insn "neon_vabdl<sup><mode>"
3205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3206 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3207 (match_operand:VW 2 "s_register_operand" "w")]
3210 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3211 [(set_attr "type" "neon_abd_long")]
3214 (define_insn "neon_vaba<sup><mode>"
3215 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3216 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3217 (match_operand:VDQIW 3 "s_register_operand" "w")]
3219 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3221 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3222 [(set_attr "type" "neon_arith_acc<q>")]
3225 (define_insn "neon_vabal<sup><mode>"
3226 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3228 (match_operand:VW 3 "s_register_operand" "w")]
3230 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3232 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3233 [(set_attr "type" "neon_arith_acc<q>")]
3236 (define_insn "neon_v<maxmin><sup><mode>"
3237 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3238 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3239 (match_operand:VDQIW 2 "s_register_operand" "w")]
3242 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3243 [(set_attr "type" "neon_minmax<q>")]
3246 (define_insn "neon_v<maxmin>f<mode>"
3247 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3248 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3249 (match_operand:VCVTF 2 "s_register_operand" "w")]
3252 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3253 [(set_attr "type" "neon_fp_minmax_s<q>")]
3256 (define_insn "neon_v<maxmin>f<mode>"
3257 [(set (match_operand:VH 0 "s_register_operand" "=w")
3259 [(match_operand:VH 1 "s_register_operand" "w")
3260 (match_operand:VH 2 "s_register_operand" "w")]
3262 "TARGET_NEON_FP16INST"
3263 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264 [(set_attr "type" "neon_fp_minmax_s<q>")]
3267 (define_insn "neon_vp<maxmin>fv4hf"
3268 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3270 [(match_operand:V4HF 1 "s_register_operand" "w")
3271 (match_operand:V4HF 2 "s_register_operand" "w")]
3273 "TARGET_NEON_FP16INST"
3274 "vp<maxmin>.f16\t%P0, %P1, %P2"
3275 [(set_attr "type" "neon_reduc_minmax")]
3278 (define_insn "neon_<fmaxmin_op><mode>"
3280 (match_operand:VH 0 "s_register_operand" "=w")
3282 [(match_operand:VH 1 "s_register_operand" "w")
3283 (match_operand:VH 2 "s_register_operand" "w")]
3285 "TARGET_NEON_FP16INST"
3286 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3287 [(set_attr "type" "neon_fp_minmax_s<q>")]
3290 ;; v<maxmin>nm intrinsics.
3291 (define_insn "neon_<fmaxmin_op><mode>"
3292 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3293 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3294 (match_operand:VCVTF 2 "s_register_operand" "w")]
3296 "TARGET_NEON && TARGET_VFP5"
3297 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3298 [(set_attr "type" "neon_fp_minmax_s<q>")]
3301 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3302 (define_insn "<fmaxmin><mode>3"
3303 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3304 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3305 (match_operand:VCVTF 2 "s_register_operand" "w")]
3307 "TARGET_NEON && TARGET_VFP5"
3308 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3309 [(set_attr "type" "neon_fp_minmax_s<q>")]
3312 (define_expand "neon_vpadd<mode>"
3313 [(match_operand:VD 0 "s_register_operand" "=w")
3314 (match_operand:VD 1 "s_register_operand" "w")
3315 (match_operand:VD 2 "s_register_operand" "w")]
3318 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3323 (define_insn "neon_vpaddl<sup><mode>"
3324 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3325 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3328 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3329 [(set_attr "type" "neon_reduc_add_long")]
3332 (define_insn "neon_vpadal<sup><mode>"
3333 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3334 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3335 (match_operand:VDQIW 2 "s_register_operand" "w")]
3338 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3339 [(set_attr "type" "neon_reduc_add_acc")]
3342 (define_insn "neon_vp<maxmin><sup><mode>"
3343 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3344 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3345 (match_operand:VDI 2 "s_register_operand" "w")]
3348 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3349 [(set_attr "type" "neon_reduc_minmax<q>")]
3352 (define_insn "neon_vp<maxmin>f<mode>"
3353 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3354 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3355 (match_operand:VCVTF 2 "s_register_operand" "w")]
3358 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3359 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3362 (define_insn "neon_vrecps<mode>"
3363 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3364 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3365 (match_operand:VCVTF 2 "s_register_operand" "w")]
3368 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3369 [(set_attr "type" "neon_fp_recps_s<q>")]
3372 (define_insn "neon_vrecps<mode>"
3374 (match_operand:VH 0 "s_register_operand" "=w")
3375 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3376 (match_operand:VH 2 "s_register_operand" "w")]
3378 "TARGET_NEON_FP16INST"
3379 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380 [(set_attr "type" "neon_fp_recps_s<q>")]
3383 (define_insn "neon_vrsqrts<mode>"
3384 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386 (match_operand:VCVTF 2 "s_register_operand" "w")]
3389 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3393 (define_insn "neon_vrsqrts<mode>"
3395 (match_operand:VH 0 "s_register_operand" "=w")
3396 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397 (match_operand:VH 2 "s_register_operand" "w")]
3399 "TARGET_NEON_FP16INST"
3400 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3404 (define_expand "neon_vabs<mode>"
3405 [(match_operand:VDQW 0 "s_register_operand" "")
3406 (match_operand:VDQW 1 "s_register_operand" "")]
3409 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3413 (define_insn "neon_vqabs<mode>"
3414 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3415 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3418 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3419 [(set_attr "type" "neon_qabs<q>")]
3422 (define_insn "neon_bswap<mode>"
3423 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3424 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3426 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3427 [(set_attr "type" "neon_rev<q>")]
3430 (define_expand "neon_vneg<mode>"
3431 [(match_operand:VDQW 0 "s_register_operand" "")
3432 (match_operand:VDQW 1 "s_register_operand" "")]
3435 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3439 ;; These instructions map to the __builtins for the Dot Product operations.
3440 (define_insn "neon_<sup>dot<vsi2qi>"
3441 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3442 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3443 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3444 "register_operand" "w")
3445 (match_operand:<VSI2QI> 3
3446 "register_operand" "w")]
3449 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3450 [(set_attr "type" "neon_dot")]
3453 ;; These instructions map to the __builtins for the Dot Product
3454 ;; indexed operations.
3455 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3456 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3457 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3458 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3459 "register_operand" "w")
3460 (match_operand:V8QI 3 "register_operand" "t")
3461 (match_operand:SI 4 "immediate_operand" "i")]
3466 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3467 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3469 [(set_attr "type" "neon_dot")]
3472 ;; These expands map to the Dot Product optab the vectorizer checks for.
3473 ;; The auto-vectorizer expects a dot product builtin that also does an
3474 ;; accumulation into the provided register.
3475 ;; Given the following pattern
3477 ;; for (i=0; i<len; i++) {
3483 ;; This can be auto-vectorized to
3484 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3486 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3487 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3488 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3491 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3492 (define_expand "<sup>dot_prod<vsi2qi>"
3493 [(set (match_operand:VCVTI 0 "register_operand")
3494 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3496 (match_operand:<VSI2QI> 2
3497 "register_operand")]
3499 (match_operand:VCVTI 3 "register_operand")))]
3503 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3505 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3509 (define_expand "neon_copysignf<mode>"
3510 [(match_operand:VCVTF 0 "register_operand")
3511 (match_operand:VCVTF 1 "register_operand")
3512 (match_operand:VCVTF 2 "register_operand")]
3516 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3517 rtx c = GEN_INT (0x80000000);
3519 emit_move_insn (v_bitmask,
3520 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3521 emit_move_insn (operands[0], operands[2]);
3522 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3523 <VCVTF:V_cmp_result>mode, 0);
3524 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3531 (define_insn "neon_vqneg<mode>"
3532 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3533 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3536 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3537 [(set_attr "type" "neon_qneg<q>")]
3540 (define_insn "neon_vcls<mode>"
3541 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3542 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3545 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3546 [(set_attr "type" "neon_cls<q>")]
3549 (define_insn "clz<mode>2"
3550 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3551 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3553 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3554 [(set_attr "type" "neon_cnt<q>")]
3557 (define_expand "neon_vclz<mode>"
3558 [(match_operand:VDQIW 0 "s_register_operand" "")
3559 (match_operand:VDQIW 1 "s_register_operand" "")]
3562 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3566 (define_insn "popcount<mode>2"
3567 [(set (match_operand:VE 0 "s_register_operand" "=w")
3568 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3570 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3571 [(set_attr "type" "neon_cnt<q>")]
3574 (define_expand "neon_vcnt<mode>"
3575 [(match_operand:VE 0 "s_register_operand" "=w")
3576 (match_operand:VE 1 "s_register_operand" "w")]
3579 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3583 (define_insn "neon_vrecpe<mode>"
3584 [(set (match_operand:VH 0 "s_register_operand" "=w")
3585 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3587 "TARGET_NEON_FP16INST"
3588 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3589 [(set_attr "type" "neon_fp_recpe_s<q>")]
3592 (define_insn "neon_vrecpe<mode>"
3593 [(set (match_operand:V32 0 "s_register_operand" "=w")
3594 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3597 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3598 [(set_attr "type" "neon_fp_recpe_s<q>")]
3601 (define_insn "neon_vrsqrte<mode>"
3602 [(set (match_operand:V32 0 "s_register_operand" "=w")
3603 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3606 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3607 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3610 (define_expand "neon_vmvn<mode>"
3611 [(match_operand:VDQIW 0 "s_register_operand" "")
3612 (match_operand:VDQIW 1 "s_register_operand" "")]
3615 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3619 (define_insn "neon_vget_lane<mode>_sext_internal"
3620 [(set (match_operand:SI 0 "s_register_operand" "=r")
3622 (vec_select:<V_elem>
3623 (match_operand:VD 1 "s_register_operand" "w")
3624 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3627 if (BYTES_BIG_ENDIAN)
3629 int elt = INTVAL (operands[2]);
3630 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3631 operands[2] = GEN_INT (elt);
3633 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3635 [(set_attr "type" "neon_to_gp")]
3638 (define_insn "neon_vget_lane<mode>_zext_internal"
3639 [(set (match_operand:SI 0 "s_register_operand" "=r")
3641 (vec_select:<V_elem>
3642 (match_operand:VD 1 "s_register_operand" "w")
3643 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3646 if (BYTES_BIG_ENDIAN)
3648 int elt = INTVAL (operands[2]);
3649 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3650 operands[2] = GEN_INT (elt);
3652 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3654 [(set_attr "type" "neon_to_gp")]
3657 (define_insn "neon_vget_lane<mode>_sext_internal"
3658 [(set (match_operand:SI 0 "s_register_operand" "=r")
3660 (vec_select:<V_elem>
3661 (match_operand:VQ2 1 "s_register_operand" "w")
3662 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3666 int regno = REGNO (operands[1]);
3667 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3668 unsigned int elt = INTVAL (operands[2]);
3669 unsigned int elt_adj = elt % halfelts;
3671 if (BYTES_BIG_ENDIAN)
3672 elt_adj = halfelts - 1 - elt_adj;
3674 ops[0] = operands[0];
3675 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3676 ops[2] = GEN_INT (elt_adj);
3677 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3681 [(set_attr "type" "neon_to_gp_q")]
3684 (define_insn "neon_vget_lane<mode>_zext_internal"
3685 [(set (match_operand:SI 0 "s_register_operand" "=r")
3687 (vec_select:<V_elem>
3688 (match_operand:VQ2 1 "s_register_operand" "w")
3689 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3693 int regno = REGNO (operands[1]);
3694 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3695 unsigned int elt = INTVAL (operands[2]);
3696 unsigned int elt_adj = elt % halfelts;
3698 if (BYTES_BIG_ENDIAN)
3699 elt_adj = halfelts - 1 - elt_adj;
3701 ops[0] = operands[0];
3702 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3703 ops[2] = GEN_INT (elt_adj);
3704 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3708 [(set_attr "type" "neon_to_gp_q")]
3711 (define_expand "neon_vget_lane<mode>"
3712 [(match_operand:<V_ext> 0 "s_register_operand" "")
3713 (match_operand:VDQW 1 "s_register_operand" "")
3714 (match_operand:SI 2 "immediate_operand" "")]
3717 if (BYTES_BIG_ENDIAN)
3719 /* The intrinsics are defined in terms of a model where the
3720 element ordering in memory is vldm order, whereas the generic
3721 RTL is defined in terms of a model where the element ordering
3722 in memory is array order. Convert the lane number to conform
3724 unsigned int elt = INTVAL (operands[2]);
3725 unsigned int reg_nelts
3726 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3727 elt ^= reg_nelts - 1;
3728 operands[2] = GEN_INT (elt);
3731 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3732 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3735 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3741 (define_expand "neon_vget_laneu<mode>"
3742 [(match_operand:<V_ext> 0 "s_register_operand" "")
3743 (match_operand:VDQIW 1 "s_register_operand" "")
3744 (match_operand:SI 2 "immediate_operand" "")]
3747 if (BYTES_BIG_ENDIAN)
3749 /* The intrinsics are defined in terms of a model where the
3750 element ordering in memory is vldm order, whereas the generic
3751 RTL is defined in terms of a model where the element ordering
3752 in memory is array order. Convert the lane number to conform
3754 unsigned int elt = INTVAL (operands[2]);
3755 unsigned int reg_nelts
3756 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3757 elt ^= reg_nelts - 1;
3758 operands[2] = GEN_INT (elt);
3761 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3762 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3765 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3771 (define_expand "neon_vget_lanedi"
3772 [(match_operand:DI 0 "s_register_operand" "=r")
3773 (match_operand:DI 1 "s_register_operand" "w")
3774 (match_operand:SI 2 "immediate_operand" "")]
3777 emit_move_insn (operands[0], operands[1]);
3781 (define_expand "neon_vget_lanev2di"
3782 [(match_operand:DI 0 "s_register_operand" "")
3783 (match_operand:V2DI 1 "s_register_operand" "")
3784 (match_operand:SI 2 "immediate_operand" "")]
3789 if (BYTES_BIG_ENDIAN)
3791 /* The intrinsics are defined in terms of a model where the
3792 element ordering in memory is vldm order, whereas the generic
3793 RTL is defined in terms of a model where the element ordering
3794 in memory is array order. Convert the lane number to conform
3796 unsigned int elt = INTVAL (operands[2]);
3797 unsigned int reg_nelts = 2;
3798 elt ^= reg_nelts - 1;
3799 operands[2] = GEN_INT (elt);
3802 lane = INTVAL (operands[2]);
3803 gcc_assert ((lane ==0) || (lane == 1));
3804 emit_move_insn (operands[0], lane == 0
3805 ? gen_lowpart (DImode, operands[1])
3806 : gen_highpart (DImode, operands[1]));
3810 (define_expand "neon_vset_lane<mode>"
3811 [(match_operand:VDQ 0 "s_register_operand" "=w")
3812 (match_operand:<V_elem> 1 "s_register_operand" "r")
3813 (match_operand:VDQ 2 "s_register_operand" "0")
3814 (match_operand:SI 3 "immediate_operand" "i")]
3817 unsigned int elt = INTVAL (operands[3]);
3819 if (BYTES_BIG_ENDIAN)
3821 unsigned int reg_nelts
3822 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3823 elt ^= reg_nelts - 1;
3826 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3827 GEN_INT (1 << elt), operands[2]));
3831 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3833 (define_expand "neon_vset_lanedi"
3834 [(match_operand:DI 0 "s_register_operand" "=w")
3835 (match_operand:DI 1 "s_register_operand" "r")
3836 (match_operand:DI 2 "s_register_operand" "0")
3837 (match_operand:SI 3 "immediate_operand" "i")]
3840 emit_move_insn (operands[0], operands[1]);
3844 (define_expand "neon_vcreate<mode>"
3845 [(match_operand:VD_RE 0 "s_register_operand" "")
3846 (match_operand:DI 1 "general_operand" "")]
3849 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3850 emit_move_insn (operands[0], src);
3854 (define_insn "neon_vdup_n<mode>"
3855 [(set (match_operand:VX 0 "s_register_operand" "=w")
3856 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3858 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3859 [(set_attr "type" "neon_from_gp<q>")]
3862 (define_insn "neon_vdup_nv4hf"
3863 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3864 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3867 [(set_attr "type" "neon_from_gp")]
3870 (define_insn "neon_vdup_nv8hf"
3871 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3872 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3875 [(set_attr "type" "neon_from_gp_q")]
3878 (define_insn "neon_vdup_n<mode>"
3879 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3880 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3883 vdup.<V_sz_elem>\t%<V_reg>0, %1
3884 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3885 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3888 (define_expand "neon_vdup_ndi"
3889 [(match_operand:DI 0 "s_register_operand" "=w")
3890 (match_operand:DI 1 "s_register_operand" "r")]
3893 emit_move_insn (operands[0], operands[1]);
3898 (define_insn "neon_vdup_nv2di"
3899 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3900 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3903 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3904 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3905 [(set_attr "length" "8")
3906 (set_attr "type" "multiple")]
3909 (define_insn "neon_vdup_lane<mode>_internal"
3910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3912 (vec_select:<V_elem>
3913 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3914 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3917 if (BYTES_BIG_ENDIAN)
3919 int elt = INTVAL (operands[2]);
3920 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3921 operands[2] = GEN_INT (elt);
3924 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3926 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3928 [(set_attr "type" "neon_dup<q>")]
3931 (define_insn "neon_vdup_lane<mode>_internal"
3932 [(set (match_operand:VH 0 "s_register_operand" "=w")
3934 (vec_select:<V_elem>
3935 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3936 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3937 "TARGET_NEON && TARGET_FP16"
3939 if (BYTES_BIG_ENDIAN)
3941 int elt = INTVAL (operands[2]);
3942 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3943 operands[2] = GEN_INT (elt);
3946 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3948 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3950 [(set_attr "type" "neon_dup<q>")]
3953 (define_expand "neon_vdup_lane<mode>"
3954 [(match_operand:VDQW 0 "s_register_operand" "=w")
3955 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3956 (match_operand:SI 2 "immediate_operand" "i")]
3959 if (BYTES_BIG_ENDIAN)
3961 unsigned int elt = INTVAL (operands[2]);
3962 unsigned int reg_nelts
3963 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3964 elt ^= reg_nelts - 1;
3965 operands[2] = GEN_INT (elt);
3967 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3972 (define_expand "neon_vdup_lane<mode>"
3973 [(match_operand:VH 0 "s_register_operand")
3974 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3975 (match_operand:SI 2 "immediate_operand")]
3976 "TARGET_NEON && TARGET_FP16"
3978 if (BYTES_BIG_ENDIAN)
3980 unsigned int elt = INTVAL (operands[2]);
3981 unsigned int reg_nelts
3982 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3983 elt ^= reg_nelts - 1;
3984 operands[2] = GEN_INT (elt);
3986 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3991 ; Scalar index is ignored, since only zero is valid here.
3992 (define_expand "neon_vdup_lanedi"
3993 [(match_operand:DI 0 "s_register_operand" "=w")
3994 (match_operand:DI 1 "s_register_operand" "w")
3995 (match_operand:SI 2 "immediate_operand" "i")]
3998 emit_move_insn (operands[0], operands[1]);
4002 ; Likewise for v2di, as the DImode second operand has only a single element.
4003 (define_expand "neon_vdup_lanev2di"
4004 [(match_operand:V2DI 0 "s_register_operand" "=w")
4005 (match_operand:DI 1 "s_register_operand" "w")
4006 (match_operand:SI 2 "immediate_operand" "i")]
4009 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4013 ; Disabled before reload because we don't want combine doing something silly,
4014 ; but used by the post-reload expansion of neon_vcombine.
4015 (define_insn "*neon_vswp<mode>"
4016 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4017 (match_operand:VDQX 1 "s_register_operand" "+w"))
4018 (set (match_dup 1) (match_dup 0))]
4019 "TARGET_NEON && reload_completed"
4020 "vswp\t%<V_reg>0, %<V_reg>1"
4021 [(set_attr "type" "neon_permute<q>")]
4024 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4026 ;; FIXME: A different implementation of this builtin could make it much
4027 ;; more likely that we wouldn't actually need to output anything (we could make
4028 ;; it so that the reg allocator puts things in the right places magically
4029 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4031 (define_insn_and_split "neon_vcombine<mode>"
4032 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4033 (vec_concat:<V_DOUBLE>
4034 (match_operand:VDX 1 "s_register_operand" "w")
4035 (match_operand:VDX 2 "s_register_operand" "w")))]
4038 "&& reload_completed"
4041 neon_split_vcombine (operands);
4044 [(set_attr "type" "multiple")]
4047 (define_expand "neon_vget_high<mode>"
4048 [(match_operand:<V_HALF> 0 "s_register_operand")
4049 (match_operand:VQX 1 "s_register_operand")]
4052 emit_move_insn (operands[0],
4053 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4054 GET_MODE_SIZE (<V_HALF>mode)));
4058 (define_expand "neon_vget_low<mode>"
4059 [(match_operand:<V_HALF> 0 "s_register_operand")
4060 (match_operand:VQX 1 "s_register_operand")]
4063 emit_move_insn (operands[0],
4064 simplify_gen_subreg (<V_HALF>mode, operands[1],
4069 (define_insn "float<mode><V_cvtto>2"
4070 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4071 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4072 "TARGET_NEON && !flag_rounding_math"
4073 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4074 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4077 (define_insn "floatuns<mode><V_cvtto>2"
4078 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4079 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4080 "TARGET_NEON && !flag_rounding_math"
4081 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4082 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4085 (define_insn "fix_trunc<mode><V_cvtto>2"
4086 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4087 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4089 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4090 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4093 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4094 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4095 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4097 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4098 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4101 (define_insn "neon_vcvt<sup><mode>"
4102 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4103 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4106 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4107 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4110 (define_insn "neon_vcvt<sup><mode>"
4111 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4112 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4115 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4116 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4119 (define_insn "neon_vcvtv4sfv4hf"
4120 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4121 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4123 "TARGET_NEON && TARGET_FP16"
4124 "vcvt.f32.f16\t%q0, %P1"
4125 [(set_attr "type" "neon_fp_cvt_widen_h")]
4128 (define_insn "neon_vcvtv4hfv4sf"
4129 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4130 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4132 "TARGET_NEON && TARGET_FP16"
4133 "vcvt.f16.f32\t%P0, %q1"
4134 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4137 (define_insn "neon_vcvt<sup><mode>"
4139 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4141 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4143 "TARGET_NEON_FP16INST"
4144 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4145 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4148 (define_insn "neon_vcvt<sup><mode>"
4150 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4152 [(match_operand:VH 1 "s_register_operand" "w")]
4154 "TARGET_NEON_FP16INST"
4155 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4156 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4159 (define_insn "neon_vcvt<sup>_n<mode>"
4160 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4161 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4162 (match_operand:SI 2 "immediate_operand" "i")]
4166 arm_const_bounds (operands[2], 1, 33);
4167 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4169 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4172 (define_insn "neon_vcvt<sup>_n<mode>"
4173 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4175 [(match_operand:VH 1 "s_register_operand" "w")
4176 (match_operand:SI 2 "immediate_operand" "i")]
4178 "TARGET_NEON_FP16INST"
4180 arm_const_bounds (operands[2], 0, 17);
4181 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4183 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4186 (define_insn "neon_vcvt<sup>_n<mode>"
4187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4188 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4189 (match_operand:SI 2 "immediate_operand" "i")]
4193 arm_const_bounds (operands[2], 1, 33);
4194 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4196 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4199 (define_insn "neon_vcvt<sup>_n<mode>"
4200 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4202 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4203 (match_operand:SI 2 "immediate_operand" "i")]
4205 "TARGET_NEON_FP16INST"
4207 arm_const_bounds (operands[2], 0, 17);
4208 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4210 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4213 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4215 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4217 [(match_operand:VH 1 "s_register_operand" "w")]
4219 "TARGET_NEON_FP16INST"
4220 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4221 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4224 (define_insn "neon_vmovn<mode>"
4225 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4226 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4229 "vmovn.<V_if_elem>\t%P0, %q1"
4230 [(set_attr "type" "neon_shift_imm_narrow_q")]
4233 (define_insn "neon_vqmovn<sup><mode>"
4234 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4235 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4238 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4239 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4242 (define_insn "neon_vqmovun<mode>"
4243 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4244 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4247 "vqmovun.<V_s_elem>\t%P0, %q1"
4248 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4251 (define_insn "neon_vmovl<sup><mode>"
4252 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4253 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4256 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4257 [(set_attr "type" "neon_shift_imm_long")]
4260 (define_insn "neon_vmul_lane<mode>"
4261 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4262 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4263 (match_operand:VMD 2 "s_register_operand"
4264 "<scalar_mul_constraint>")
4265 (match_operand:SI 3 "immediate_operand" "i")]
4269 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4272 (if_then_else (match_test "<Is_float_mode>")
4273 (const_string "neon_fp_mul_s_scalar<q>")
4274 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4277 (define_insn "neon_vmul_lane<mode>"
4278 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4279 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4280 (match_operand:<V_HALF> 2 "s_register_operand"
4281 "<scalar_mul_constraint>")
4282 (match_operand:SI 3 "immediate_operand" "i")]
4286 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4289 (if_then_else (match_test "<Is_float_mode>")
4290 (const_string "neon_fp_mul_s_scalar<q>")
4291 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4294 (define_insn "neon_vmul_lane<mode>"
4295 [(set (match_operand:VH 0 "s_register_operand" "=w")
4296 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4297 (match_operand:V4HF 2 "s_register_operand"
4298 "<scalar_mul_constraint>")
4299 (match_operand:SI 3 "immediate_operand" "i")]
4301 "TARGET_NEON_FP16INST"
4302 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4303 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4306 (define_insn "neon_vmull<sup>_lane<mode>"
4307 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4308 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4309 (match_operand:VMDI 2 "s_register_operand"
4310 "<scalar_mul_constraint>")
4311 (match_operand:SI 3 "immediate_operand" "i")]
4315 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4317 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4320 (define_insn "neon_vqdmull_lane<mode>"
4321 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4322 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4323 (match_operand:VMDI 2 "s_register_operand"
4324 "<scalar_mul_constraint>")
4325 (match_operand:SI 3 "immediate_operand" "i")]
4326 UNSPEC_VQDMULL_LANE))]
4329 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4331 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4334 (define_insn "neon_vq<r>dmulh_lane<mode>"
4335 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4336 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4337 (match_operand:<V_HALF> 2 "s_register_operand"
4338 "<scalar_mul_constraint>")
4339 (match_operand:SI 3 "immediate_operand" "i")]
4343 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4345 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4348 (define_insn "neon_vq<r>dmulh_lane<mode>"
4349 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4350 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4351 (match_operand:VMDI 2 "s_register_operand"
4352 "<scalar_mul_constraint>")
4353 (match_operand:SI 3 "immediate_operand" "i")]
4357 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4359 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4362 ;; vqrdmlah_lane, vqrdmlsh_lane
4363 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4364 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4365 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4366 (match_operand:VMQI 2 "s_register_operand" "w")
4367 (match_operand:<V_HALF> 3 "s_register_operand"
4368 "<scalar_mul_constraint>")
4369 (match_operand:SI 4 "immediate_operand" "i")]
4374 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4376 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4379 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4380 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4381 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4382 (match_operand:VMDI 2 "s_register_operand" "w")
4383 (match_operand:VMDI 3 "s_register_operand"
4384 "<scalar_mul_constraint>")
4385 (match_operand:SI 4 "immediate_operand" "i")]
4390 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4392 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4395 (define_insn "neon_vmla_lane<mode>"
4396 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4397 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4398 (match_operand:VMD 2 "s_register_operand" "w")
4399 (match_operand:VMD 3 "s_register_operand"
4400 "<scalar_mul_constraint>")
4401 (match_operand:SI 4 "immediate_operand" "i")]
4405 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4408 (if_then_else (match_test "<Is_float_mode>")
4409 (const_string "neon_fp_mla_s_scalar<q>")
4410 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4413 (define_insn "neon_vmla_lane<mode>"
4414 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4415 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4416 (match_operand:VMQ 2 "s_register_operand" "w")
4417 (match_operand:<V_HALF> 3 "s_register_operand"
4418 "<scalar_mul_constraint>")
4419 (match_operand:SI 4 "immediate_operand" "i")]
4423 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4426 (if_then_else (match_test "<Is_float_mode>")
4427 (const_string "neon_fp_mla_s_scalar<q>")
4428 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4431 (define_insn "neon_vmlal<sup>_lane<mode>"
4432 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4433 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4434 (match_operand:VMDI 2 "s_register_operand" "w")
4435 (match_operand:VMDI 3 "s_register_operand"
4436 "<scalar_mul_constraint>")
4437 (match_operand:SI 4 "immediate_operand" "i")]
4441 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4443 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4446 (define_insn "neon_vqdmlal_lane<mode>"
4447 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4448 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4449 (match_operand:VMDI 2 "s_register_operand" "w")
4450 (match_operand:VMDI 3 "s_register_operand"
4451 "<scalar_mul_constraint>")
4452 (match_operand:SI 4 "immediate_operand" "i")]
4453 UNSPEC_VQDMLAL_LANE))]
4456 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4458 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4461 (define_insn "neon_vmls_lane<mode>"
4462 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4463 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4464 (match_operand:VMD 2 "s_register_operand" "w")
4465 (match_operand:VMD 3 "s_register_operand"
4466 "<scalar_mul_constraint>")
4467 (match_operand:SI 4 "immediate_operand" "i")]
4471 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4474 (if_then_else (match_test "<Is_float_mode>")
4475 (const_string "neon_fp_mla_s_scalar<q>")
4476 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4479 (define_insn "neon_vmls_lane<mode>"
4480 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4481 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4482 (match_operand:VMQ 2 "s_register_operand" "w")
4483 (match_operand:<V_HALF> 3 "s_register_operand"
4484 "<scalar_mul_constraint>")
4485 (match_operand:SI 4 "immediate_operand" "i")]
4489 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4492 (if_then_else (match_test "<Is_float_mode>")
4493 (const_string "neon_fp_mla_s_scalar<q>")
4494 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4497 (define_insn "neon_vmlsl<sup>_lane<mode>"
4498 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4499 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4500 (match_operand:VMDI 2 "s_register_operand" "w")
4501 (match_operand:VMDI 3 "s_register_operand"
4502 "<scalar_mul_constraint>")
4503 (match_operand:SI 4 "immediate_operand" "i")]
4507 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4509 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4512 (define_insn "neon_vqdmlsl_lane<mode>"
4513 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4514 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4515 (match_operand:VMDI 2 "s_register_operand" "w")
4516 (match_operand:VMDI 3 "s_register_operand"
4517 "<scalar_mul_constraint>")
4518 (match_operand:SI 4 "immediate_operand" "i")]
4519 UNSPEC_VQDMLSL_LANE))]
4522 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4524 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4527 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4528 ; core register into a temp register, then use a scalar taken from that. This
4529 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4530 ; or extracted from another vector. The latter case it's currently better to
4531 ; use the "_lane" variant, and the former case can probably be implemented
4532 ; using vld1_lane, but that hasn't been done yet.
4534 (define_expand "neon_vmul_n<mode>"
4535 [(match_operand:VMD 0 "s_register_operand" "")
4536 (match_operand:VMD 1 "s_register_operand" "")
4537 (match_operand:<V_elem> 2 "s_register_operand" "")]
4540 rtx tmp = gen_reg_rtx (<MODE>mode);
4541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4542 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4547 (define_expand "neon_vmul_n<mode>"
4548 [(match_operand:VMQ 0 "s_register_operand" "")
4549 (match_operand:VMQ 1 "s_register_operand" "")
4550 (match_operand:<V_elem> 2 "s_register_operand" "")]
4553 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4554 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4555 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4560 (define_expand "neon_vmul_n<mode>"
4561 [(match_operand:VH 0 "s_register_operand")
4562 (match_operand:VH 1 "s_register_operand")
4563 (match_operand:<V_elem> 2 "s_register_operand")]
4564 "TARGET_NEON_FP16INST"
4566 rtx tmp = gen_reg_rtx (V4HFmode);
4567 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4568 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4573 (define_expand "neon_vmulls_n<mode>"
4574 [(match_operand:<V_widen> 0 "s_register_operand" "")
4575 (match_operand:VMDI 1 "s_register_operand" "")
4576 (match_operand:<V_elem> 2 "s_register_operand" "")]
4579 rtx tmp = gen_reg_rtx (<MODE>mode);
4580 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4581 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4586 (define_expand "neon_vmullu_n<mode>"
4587 [(match_operand:<V_widen> 0 "s_register_operand" "")
4588 (match_operand:VMDI 1 "s_register_operand" "")
4589 (match_operand:<V_elem> 2 "s_register_operand" "")]
4592 rtx tmp = gen_reg_rtx (<MODE>mode);
4593 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4594 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4599 (define_expand "neon_vqdmull_n<mode>"
4600 [(match_operand:<V_widen> 0 "s_register_operand" "")
4601 (match_operand:VMDI 1 "s_register_operand" "")
4602 (match_operand:<V_elem> 2 "s_register_operand" "")]
4605 rtx tmp = gen_reg_rtx (<MODE>mode);
4606 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4607 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4612 (define_expand "neon_vqdmulh_n<mode>"
4613 [(match_operand:VMDI 0 "s_register_operand" "")
4614 (match_operand:VMDI 1 "s_register_operand" "")
4615 (match_operand:<V_elem> 2 "s_register_operand" "")]
4618 rtx tmp = gen_reg_rtx (<MODE>mode);
4619 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4620 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4625 (define_expand "neon_vqrdmulh_n<mode>"
4626 [(match_operand:VMDI 0 "s_register_operand" "")
4627 (match_operand:VMDI 1 "s_register_operand" "")
4628 (match_operand:<V_elem> 2 "s_register_operand" "")]
4631 rtx tmp = gen_reg_rtx (<MODE>mode);
4632 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4633 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4638 (define_expand "neon_vqdmulh_n<mode>"
4639 [(match_operand:VMQI 0 "s_register_operand" "")
4640 (match_operand:VMQI 1 "s_register_operand" "")
4641 (match_operand:<V_elem> 2 "s_register_operand" "")]
4644 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4645 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4646 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4651 (define_expand "neon_vqrdmulh_n<mode>"
4652 [(match_operand:VMQI 0 "s_register_operand" "")
4653 (match_operand:VMQI 1 "s_register_operand" "")
4654 (match_operand:<V_elem> 2 "s_register_operand" "")]
4657 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4659 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4664 (define_expand "neon_vmla_n<mode>"
4665 [(match_operand:VMD 0 "s_register_operand" "")
4666 (match_operand:VMD 1 "s_register_operand" "")
4667 (match_operand:VMD 2 "s_register_operand" "")
4668 (match_operand:<V_elem> 3 "s_register_operand" "")]
4671 rtx tmp = gen_reg_rtx (<MODE>mode);
4672 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4678 (define_expand "neon_vmla_n<mode>"
4679 [(match_operand:VMQ 0 "s_register_operand" "")
4680 (match_operand:VMQ 1 "s_register_operand" "")
4681 (match_operand:VMQ 2 "s_register_operand" "")
4682 (match_operand:<V_elem> 3 "s_register_operand" "")]
4685 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4686 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4687 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4692 (define_expand "neon_vmlals_n<mode>"
4693 [(match_operand:<V_widen> 0 "s_register_operand" "")
4694 (match_operand:<V_widen> 1 "s_register_operand" "")
4695 (match_operand:VMDI 2 "s_register_operand" "")
4696 (match_operand:<V_elem> 3 "s_register_operand" "")]
4699 rtx tmp = gen_reg_rtx (<MODE>mode);
4700 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4706 (define_expand "neon_vmlalu_n<mode>"
4707 [(match_operand:<V_widen> 0 "s_register_operand" "")
4708 (match_operand:<V_widen> 1 "s_register_operand" "")
4709 (match_operand:VMDI 2 "s_register_operand" "")
4710 (match_operand:<V_elem> 3 "s_register_operand" "")]
4713 rtx tmp = gen_reg_rtx (<MODE>mode);
4714 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4715 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4720 (define_expand "neon_vqdmlal_n<mode>"
4721 [(match_operand:<V_widen> 0 "s_register_operand" "")
4722 (match_operand:<V_widen> 1 "s_register_operand" "")
4723 (match_operand:VMDI 2 "s_register_operand" "")
4724 (match_operand:<V_elem> 3 "s_register_operand" "")]
4727 rtx tmp = gen_reg_rtx (<MODE>mode);
4728 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4729 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4734 (define_expand "neon_vmls_n<mode>"
4735 [(match_operand:VMD 0 "s_register_operand" "")
4736 (match_operand:VMD 1 "s_register_operand" "")
4737 (match_operand:VMD 2 "s_register_operand" "")
4738 (match_operand:<V_elem> 3 "s_register_operand" "")]
4741 rtx tmp = gen_reg_rtx (<MODE>mode);
4742 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4743 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4748 (define_expand "neon_vmls_n<mode>"
4749 [(match_operand:VMQ 0 "s_register_operand" "")
4750 (match_operand:VMQ 1 "s_register_operand" "")
4751 (match_operand:VMQ 2 "s_register_operand" "")
4752 (match_operand:<V_elem> 3 "s_register_operand" "")]
4755 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4756 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4757 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4762 (define_expand "neon_vmlsls_n<mode>"
4763 [(match_operand:<V_widen> 0 "s_register_operand" "")
4764 (match_operand:<V_widen> 1 "s_register_operand" "")
4765 (match_operand:VMDI 2 "s_register_operand" "")
4766 (match_operand:<V_elem> 3 "s_register_operand" "")]
4769 rtx tmp = gen_reg_rtx (<MODE>mode);
4770 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4771 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4776 (define_expand "neon_vmlslu_n<mode>"
4777 [(match_operand:<V_widen> 0 "s_register_operand" "")
4778 (match_operand:<V_widen> 1 "s_register_operand" "")
4779 (match_operand:VMDI 2 "s_register_operand" "")
4780 (match_operand:<V_elem> 3 "s_register_operand" "")]
4783 rtx tmp = gen_reg_rtx (<MODE>mode);
4784 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4785 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4790 (define_expand "neon_vqdmlsl_n<mode>"
4791 [(match_operand:<V_widen> 0 "s_register_operand" "")
4792 (match_operand:<V_widen> 1 "s_register_operand" "")
4793 (match_operand:VMDI 2 "s_register_operand" "")
4794 (match_operand:<V_elem> 3 "s_register_operand" "")]
4797 rtx tmp = gen_reg_rtx (<MODE>mode);
4798 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4799 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4804 (define_insn "neon_vext<mode>"
4805 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4806 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4807 (match_operand:VDQX 2 "s_register_operand" "w")
4808 (match_operand:SI 3 "immediate_operand" "i")]
4812 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4813 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4815 [(set_attr "type" "neon_ext<q>")]
4818 (define_insn "neon_vrev64<mode>"
4819 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4820 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4823 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4824 [(set_attr "type" "neon_rev<q>")]
4827 (define_insn "neon_vrev32<mode>"
4828 [(set (match_operand:VX 0 "s_register_operand" "=w")
4829 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4832 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4833 [(set_attr "type" "neon_rev<q>")]
4836 (define_insn "neon_vrev16<mode>"
4837 [(set (match_operand:VE 0 "s_register_operand" "=w")
4838 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4841 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4842 [(set_attr "type" "neon_rev<q>")]
4845 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4846 ; allocation. For an intrinsic of form:
4847 ; rD = vbsl_* (rS, rN, rM)
4848 ; We can use any of:
4849 ; vbsl rS, rN, rM (if D = S)
4850 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4851 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4853 (define_insn "neon_vbsl<mode>_internal"
4854 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4855 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4856 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4857 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4861 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4862 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4863 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4864 [(set_attr "type" "neon_bsl<q>")]
4867 (define_expand "neon_vbsl<mode>"
4868 [(set (match_operand:VDQX 0 "s_register_operand" "")
4869 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4870 (match_operand:VDQX 2 "s_register_operand" "")
4871 (match_operand:VDQX 3 "s_register_operand" "")]
4875 /* We can't alias operands together if they have different modes. */
4876 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4880 (define_insn "neon_v<shift_op><sup><mode>"
4881 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4882 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4883 (match_operand:VDQIX 2 "s_register_operand" "w")]
4886 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4887 [(set_attr "type" "neon_shift_imm<q>")]
4891 (define_insn "neon_v<shift_op><sup><mode>"
4892 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4893 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4894 (match_operand:VDQIX 2 "s_register_operand" "w")]
4897 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4898 [(set_attr "type" "neon_sat_shift_imm<q>")]
4902 (define_insn "neon_v<shift_op><sup>_n<mode>"
4903 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4904 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4905 (match_operand:SI 2 "immediate_operand" "i")]
4909 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4910 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4912 [(set_attr "type" "neon_shift_imm<q>")]
4915 ;; vshrn_n, vrshrn_n
4916 (define_insn "neon_v<shift_op>_n<mode>"
4917 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4918 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4919 (match_operand:SI 2 "immediate_operand" "i")]
4923 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4924 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4926 [(set_attr "type" "neon_shift_imm_narrow_q")]
4929 ;; vqshrn_n, vqrshrn_n
4930 (define_insn "neon_v<shift_op><sup>_n<mode>"
4931 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4932 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4933 (match_operand:SI 2 "immediate_operand" "i")]
4937 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4938 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4940 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4943 ;; vqshrun_n, vqrshrun_n
4944 (define_insn "neon_v<shift_op>_n<mode>"
4945 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4946 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4947 (match_operand:SI 2 "immediate_operand" "i")]
4951 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4952 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4954 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4957 (define_insn "neon_vshl_n<mode>"
4958 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4959 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4960 (match_operand:SI 2 "immediate_operand" "i")]
4964 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4965 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4967 [(set_attr "type" "neon_shift_imm<q>")]
4970 (define_insn "neon_vqshl_<sup>_n<mode>"
4971 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4972 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4973 (match_operand:SI 2 "immediate_operand" "i")]
4977 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4978 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4980 [(set_attr "type" "neon_sat_shift_imm<q>")]
4983 (define_insn "neon_vqshlu_n<mode>"
4984 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4985 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4986 (match_operand:SI 2 "immediate_operand" "i")]
4990 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4991 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4993 [(set_attr "type" "neon_sat_shift_imm<q>")]
4996 (define_insn "neon_vshll<sup>_n<mode>"
4997 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4998 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4999 (match_operand:SI 2 "immediate_operand" "i")]
5003 /* The boundaries are: 0 < imm <= size. */
5004 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5005 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5007 [(set_attr "type" "neon_shift_imm_long")]
5011 (define_insn "neon_v<shift_op><sup>_n<mode>"
5012 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5013 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5014 (match_operand:VDQIX 2 "s_register_operand" "w")
5015 (match_operand:SI 3 "immediate_operand" "i")]
5019 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5020 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5022 [(set_attr "type" "neon_shift_acc<q>")]
5025 (define_insn "neon_vsri_n<mode>"
5026 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5027 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5028 (match_operand:VDQIX 2 "s_register_operand" "w")
5029 (match_operand:SI 3 "immediate_operand" "i")]
5033 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5034 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5036 [(set_attr "type" "neon_shift_reg<q>")]
5039 (define_insn "neon_vsli_n<mode>"
5040 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5041 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5042 (match_operand:VDQIX 2 "s_register_operand" "w")
5043 (match_operand:SI 3 "immediate_operand" "i")]
5047 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5048 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5050 [(set_attr "type" "neon_shift_reg<q>")]
5053 (define_insn "neon_vtbl1v8qi"
5054 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5055 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5056 (match_operand:V8QI 2 "s_register_operand" "w")]
5059 "vtbl.8\t%P0, {%P1}, %P2"
5060 [(set_attr "type" "neon_tbl1")]
5063 (define_insn "neon_vtbl2v8qi"
5064 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5065 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5066 (match_operand:V8QI 2 "s_register_operand" "w")]
5071 int tabbase = REGNO (operands[1]);
5073 ops[0] = operands[0];
5074 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5075 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5076 ops[3] = operands[2];
5077 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5081 [(set_attr "type" "neon_tbl2")]
5084 (define_insn "neon_vtbl3v8qi"
5085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5087 (match_operand:V8QI 2 "s_register_operand" "w")]
5092 int tabbase = REGNO (operands[1]);
5094 ops[0] = operands[0];
5095 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5098 ops[4] = operands[2];
5099 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5103 [(set_attr "type" "neon_tbl3")]
5106 (define_insn "neon_vtbl4v8qi"
5107 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5108 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5109 (match_operand:V8QI 2 "s_register_operand" "w")]
5114 int tabbase = REGNO (operands[1]);
5116 ops[0] = operands[0];
5117 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5118 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5119 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5120 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5121 ops[5] = operands[2];
5122 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5126 [(set_attr "type" "neon_tbl4")]
5129 ;; These three are used by the vec_perm infrastructure for V16QImode.
5130 (define_insn_and_split "neon_vtbl1v16qi"
5131 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5132 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5133 (match_operand:V16QI 2 "s_register_operand" "w")]
5137 "&& reload_completed"
5140 rtx op0, op1, op2, part0, part2;
5144 op1 = gen_lowpart (TImode, operands[1]);
5147 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5148 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5149 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5150 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5152 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5153 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5154 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5155 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5158 [(set_attr "type" "multiple")]
5161 (define_insn_and_split "neon_vtbl2v16qi"
5162 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5163 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5164 (match_operand:V16QI 2 "s_register_operand" "w")]
5168 "&& reload_completed"
5171 rtx op0, op1, op2, part0, part2;
5178 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5179 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5180 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5181 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5183 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5184 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5185 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5186 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5189 [(set_attr "type" "multiple")]
5192 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5193 ;; handle quad-word input modes, producing octa-word output modes. But
5194 ;; that requires us to add support for octa-word vector modes in moves.
5195 ;; That seems overkill for this one use in vec_perm.
5196 (define_insn_and_split "neon_vcombinev16qi"
5197 [(set (match_operand:OI 0 "s_register_operand" "=w")
5198 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5199 (match_operand:V16QI 2 "s_register_operand" "w")]
5203 "&& reload_completed"
5206 neon_split_vcombine (operands);
5209 [(set_attr "type" "multiple")]
5212 (define_insn "neon_vtbx1v8qi"
5213 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5214 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5215 (match_operand:V8QI 2 "s_register_operand" "w")
5216 (match_operand:V8QI 3 "s_register_operand" "w")]
5219 "vtbx.8\t%P0, {%P2}, %P3"
5220 [(set_attr "type" "neon_tbl1")]
5223 (define_insn "neon_vtbx2v8qi"
5224 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5225 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5226 (match_operand:TI 2 "s_register_operand" "w")
5227 (match_operand:V8QI 3 "s_register_operand" "w")]
5232 int tabbase = REGNO (operands[2]);
5234 ops[0] = operands[0];
5235 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5236 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5237 ops[3] = operands[3];
5238 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5242 [(set_attr "type" "neon_tbl2")]
5245 (define_insn "neon_vtbx3v8qi"
5246 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5247 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5248 (match_operand:EI 2 "s_register_operand" "w")
5249 (match_operand:V8QI 3 "s_register_operand" "w")]
5254 int tabbase = REGNO (operands[2]);
5256 ops[0] = operands[0];
5257 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5258 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5259 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5260 ops[4] = operands[3];
5261 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5265 [(set_attr "type" "neon_tbl3")]
5268 (define_insn "neon_vtbx4v8qi"
5269 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5270 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5271 (match_operand:OI 2 "s_register_operand" "w")
5272 (match_operand:V8QI 3 "s_register_operand" "w")]
5277 int tabbase = REGNO (operands[2]);
5279 ops[0] = operands[0];
5280 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5281 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5282 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5283 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5284 ops[5] = operands[3];
5285 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5289 [(set_attr "type" "neon_tbl4")]
5292 (define_expand "neon_vtrn<mode>_internal"
5294 [(set (match_operand:VDQWH 0 "s_register_operand")
5295 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5296 (match_operand:VDQWH 2 "s_register_operand")]
5298 (set (match_operand:VDQWH 3 "s_register_operand")
5299 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5304 ;; Note: Different operand numbering to handle tied registers correctly.
5305 (define_insn "*neon_vtrn<mode>_insn"
5306 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5307 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5308 (match_operand:VDQWH 3 "s_register_operand" "2")]
5310 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5311 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5314 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5315 [(set_attr "type" "neon_permute<q>")]
5318 (define_expand "neon_vzip<mode>_internal"
5320 [(set (match_operand:VDQWH 0 "s_register_operand")
5321 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5322 (match_operand:VDQWH 2 "s_register_operand")]
5324 (set (match_operand:VDQWH 3 "s_register_operand")
5325 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5330 ;; Note: Different operand numbering to handle tied registers correctly.
5331 (define_insn "*neon_vzip<mode>_insn"
5332 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5333 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5334 (match_operand:VDQWH 3 "s_register_operand" "2")]
5336 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5337 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5340 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5341 [(set_attr "type" "neon_zip<q>")]
5344 (define_expand "neon_vuzp<mode>_internal"
5346 [(set (match_operand:VDQWH 0 "s_register_operand")
5347 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5348 (match_operand:VDQWH 2 "s_register_operand")]
5350 (set (match_operand:VDQWH 3 "s_register_operand" "")
5351 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5356 ;; Note: Different operand numbering to handle tied registers correctly.
5357 (define_insn "*neon_vuzp<mode>_insn"
5358 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5359 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5360 (match_operand:VDQWH 3 "s_register_operand" "2")]
5362 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5363 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5366 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5367 [(set_attr "type" "neon_zip<q>")]
5370 (define_expand "vec_load_lanes<mode><mode>"
5371 [(set (match_operand:VDQX 0 "s_register_operand")
5372 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5376 (define_insn "neon_vld1<mode>"
5377 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5378 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5381 "vld1.<V_sz_elem>\t%h0, %A1"
5382 [(set_attr "type" "neon_load1_1reg<q>")]
5385 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5386 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5388 (define_insn "neon_vld1_lane<mode>"
5389 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5390 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5391 (match_operand:VDX 2 "s_register_operand" "0")
5392 (match_operand:SI 3 "immediate_operand" "i")]
5396 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5398 operands[3] = GEN_INT (lane);
5400 return "vld1.<V_sz_elem>\t%P0, %A1";
5402 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5404 [(set_attr "type" "neon_load1_one_lane<q>")]
5407 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5408 ;; here on big endian targets.
5409 (define_insn "neon_vld1_lane<mode>"
5410 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5411 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412 (match_operand:VQX 2 "s_register_operand" "0")
5413 (match_operand:SI 3 "immediate_operand" "i")]
5417 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419 operands[3] = GEN_INT (lane);
5420 int regno = REGNO (operands[0]);
5421 if (lane >= max / 2)
5425 operands[3] = GEN_INT (lane);
5427 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5429 return "vld1.<V_sz_elem>\t%P0, %A1";
5431 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5433 [(set_attr "type" "neon_load1_one_lane<q>")]
5436 (define_insn "neon_vld1_dup<mode>"
5437 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5438 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5440 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5441 [(set_attr "type" "neon_load1_all_lanes<q>")]
5444 ;; Special case for DImode. Treat it exactly like a simple load.
5445 (define_expand "neon_vld1_dupdi"
5446 [(set (match_operand:DI 0 "s_register_operand" "")
5447 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5453 (define_insn "neon_vld1_dup<mode>"
5454 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5455 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5458 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5460 [(set_attr "type" "neon_load1_all_lanes<q>")]
5463 (define_insn_and_split "neon_vld1_dupv2di"
5464 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5465 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5468 "&& reload_completed"
5471 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5472 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5473 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5476 [(set_attr "length" "8")
5477 (set_attr "type" "neon_load1_all_lanes_q")]
5480 (define_expand "vec_store_lanes<mode><mode>"
5481 [(set (match_operand:VDQX 0 "neon_struct_operand")
5482 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5486 (define_insn "neon_vst1<mode>"
5487 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5488 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5491 "vst1.<V_sz_elem>\t%h1, %A0"
5492 [(set_attr "type" "neon_store1_1reg<q>")])
5494 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5495 ;; here on big endian targets.
5496 (define_insn "neon_vst1_lane<mode>"
5497 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5499 [(match_operand:VDX 1 "s_register_operand" "w")
5500 (match_operand:SI 2 "immediate_operand" "i")]
5504 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5505 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5506 operands[2] = GEN_INT (lane);
5508 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5510 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5512 [(set_attr "type" "neon_store1_one_lane<q>")]
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5520 [(match_operand:VQX 1 "s_register_operand" "w")
5521 (match_operand:SI 2 "immediate_operand" "i")]
5525 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527 int regno = REGNO (operands[1]);
5528 if (lane >= max / 2)
5533 operands[2] = GEN_INT (lane);
5534 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5536 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5538 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5540 [(set_attr "type" "neon_store1_one_lane<q>")]
5543 (define_expand "vec_load_lanesti<mode>"
5544 [(set (match_operand:TI 0 "s_register_operand")
5545 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5546 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550 (define_insn "neon_vld2<mode>"
5551 [(set (match_operand:TI 0 "s_register_operand" "=w")
5552 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5553 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5557 if (<V_sz_elem> == 64)
5558 return "vld1.64\t%h0, %A1";
5560 return "vld2.<V_sz_elem>\t%h0, %A1";
5563 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5564 (const_string "neon_load1_2reg<q>")
5565 (const_string "neon_load2_2reg<q>")))]
5568 (define_expand "vec_load_lanesoi<mode>"
5569 [(set (match_operand:OI 0 "s_register_operand")
5570 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5571 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575 (define_insn "neon_vld2<mode>"
5576 [(set (match_operand:OI 0 "s_register_operand" "=w")
5577 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5578 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581 "vld2.<V_sz_elem>\t%h0, %A1"
5582 [(set_attr "type" "neon_load2_2reg_q")])
5584 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5585 ;; here on big endian targets.
5586 (define_insn "neon_vld2_lane<mode>"
5587 [(set (match_operand:TI 0 "s_register_operand" "=w")
5588 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5589 (match_operand:TI 2 "s_register_operand" "0")
5590 (match_operand:SI 3 "immediate_operand" "i")
5591 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5595 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5596 int regno = REGNO (operands[0]);
5598 ops[0] = gen_rtx_REG (DImode, regno);
5599 ops[1] = gen_rtx_REG (DImode, regno + 2);
5600 ops[2] = operands[1];
5601 ops[3] = GEN_INT (lane);
5602 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5605 [(set_attr "type" "neon_load2_one_lane<q>")]
5608 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5609 ;; here on big endian targets.
5610 (define_insn "neon_vld2_lane<mode>"
5611 [(set (match_operand:OI 0 "s_register_operand" "=w")
5612 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5613 (match_operand:OI 2 "s_register_operand" "0")
5614 (match_operand:SI 3 "immediate_operand" "i")
5615 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5620 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5621 int regno = REGNO (operands[0]);
5623 if (lane >= max / 2)
5628 ops[0] = gen_rtx_REG (DImode, regno);
5629 ops[1] = gen_rtx_REG (DImode, regno + 4);
5630 ops[2] = operands[1];
5631 ops[3] = GEN_INT (lane);
5632 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5635 [(set_attr "type" "neon_load2_one_lane<q>")]
5638 (define_insn "neon_vld2_dup<mode>"
5639 [(set (match_operand:TI 0 "s_register_operand" "=w")
5640 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5641 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5645 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5646 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5648 return "vld1.<V_sz_elem>\t%h0, %A1";
5651 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5652 (const_string "neon_load2_all_lanes<q>")
5653 (const_string "neon_load1_1reg<q>")))]
5656 (define_expand "vec_store_lanesti<mode>"
5657 [(set (match_operand:TI 0 "neon_struct_operand")
5658 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5659 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663 (define_insn "neon_vst2<mode>"
5664 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5665 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5666 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5670 if (<V_sz_elem> == 64)
5671 return "vst1.64\t%h1, %A0";
5673 return "vst2.<V_sz_elem>\t%h1, %A0";
5676 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5677 (const_string "neon_store1_2reg<q>")
5678 (const_string "neon_store2_one_lane<q>")))]
5681 (define_expand "vec_store_lanesoi<mode>"
5682 [(set (match_operand:OI 0 "neon_struct_operand")
5683 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5684 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688 (define_insn "neon_vst2<mode>"
5689 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5690 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5691 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5694 "vst2.<V_sz_elem>\t%h1, %A0"
5695 [(set_attr "type" "neon_store2_4reg<q>")]
5698 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5699 ;; here on big endian targets.
5700 (define_insn "neon_vst2_lane<mode>"
5701 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5702 (unspec:<V_two_elem>
5703 [(match_operand:TI 1 "s_register_operand" "w")
5704 (match_operand:SI 2 "immediate_operand" "i")
5705 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5710 int regno = REGNO (operands[1]);
5712 ops[0] = operands[0];
5713 ops[1] = gen_rtx_REG (DImode, regno);
5714 ops[2] = gen_rtx_REG (DImode, regno + 2);
5715 ops[3] = GEN_INT (lane);
5716 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5719 [(set_attr "type" "neon_store2_one_lane<q>")]
5722 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5723 ;; here on big endian targets.
5724 (define_insn "neon_vst2_lane<mode>"
5725 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5726 (unspec:<V_two_elem>
5727 [(match_operand:OI 1 "s_register_operand" "w")
5728 (match_operand:SI 2 "immediate_operand" "i")
5729 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5733 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5734 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5735 int regno = REGNO (operands[1]);
5737 if (lane >= max / 2)
5742 ops[0] = operands[0];
5743 ops[1] = gen_rtx_REG (DImode, regno);
5744 ops[2] = gen_rtx_REG (DImode, regno + 4);
5745 ops[3] = GEN_INT (lane);
5746 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5749 [(set_attr "type" "neon_store2_one_lane<q>")]
5752 (define_expand "vec_load_lanesei<mode>"
5753 [(set (match_operand:EI 0 "s_register_operand")
5754 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5755 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 (define_insn "neon_vld3<mode>"
5760 [(set (match_operand:EI 0 "s_register_operand" "=w")
5761 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5762 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766 if (<V_sz_elem> == 64)
5767 return "vld1.64\t%h0, %A1";
5769 return "vld3.<V_sz_elem>\t%h0, %A1";
5772 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5773 (const_string "neon_load1_3reg<q>")
5774 (const_string "neon_load3_3reg<q>")))]
5777 (define_expand "vec_load_lanesci<mode>"
5778 [(match_operand:CI 0 "s_register_operand")
5779 (match_operand:CI 1 "neon_struct_operand")
5780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5787 (define_expand "neon_vld3<mode>"
5788 [(match_operand:CI 0 "s_register_operand")
5789 (match_operand:CI 1 "neon_struct_operand")
5790 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5795 mem = adjust_address (operands[1], EImode, 0);
5796 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5797 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5798 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5802 (define_insn "neon_vld3qa<mode>"
5803 [(set (match_operand:CI 0 "s_register_operand" "=w")
5804 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5805 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5809 int regno = REGNO (operands[0]);
5811 ops[0] = gen_rtx_REG (DImode, regno);
5812 ops[1] = gen_rtx_REG (DImode, regno + 4);
5813 ops[2] = gen_rtx_REG (DImode, regno + 8);
5814 ops[3] = operands[1];
5815 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5818 [(set_attr "type" "neon_load3_3reg<q>")]
5821 (define_insn "neon_vld3qb<mode>"
5822 [(set (match_operand:CI 0 "s_register_operand" "=w")
5823 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5824 (match_operand:CI 2 "s_register_operand" "0")
5825 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 int regno = REGNO (operands[0]);
5831 ops[0] = gen_rtx_REG (DImode, regno + 2);
5832 ops[1] = gen_rtx_REG (DImode, regno + 6);
5833 ops[2] = gen_rtx_REG (DImode, regno + 10);
5834 ops[3] = operands[1];
5835 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5838 [(set_attr "type" "neon_load3_3reg<q>")]
5841 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5842 ;; here on big endian targets.
5843 (define_insn "neon_vld3_lane<mode>"
5844 [(set (match_operand:EI 0 "s_register_operand" "=w")
5845 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5846 (match_operand:EI 2 "s_register_operand" "0")
5847 (match_operand:SI 3 "immediate_operand" "i")
5848 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5853 int regno = REGNO (operands[0]);
5855 ops[0] = gen_rtx_REG (DImode, regno);
5856 ops[1] = gen_rtx_REG (DImode, regno + 2);
5857 ops[2] = gen_rtx_REG (DImode, regno + 4);
5858 ops[3] = operands[1];
5859 ops[4] = GEN_INT (lane);
5860 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5864 [(set_attr "type" "neon_load3_one_lane<q>")]
5867 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5868 ;; here on big endian targets.
5869 (define_insn "neon_vld3_lane<mode>"
5870 [(set (match_operand:CI 0 "s_register_operand" "=w")
5871 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5872 (match_operand:CI 2 "s_register_operand" "0")
5873 (match_operand:SI 3 "immediate_operand" "i")
5874 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5878 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5879 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5880 int regno = REGNO (operands[0]);
5882 if (lane >= max / 2)
5887 ops[0] = gen_rtx_REG (DImode, regno);
5888 ops[1] = gen_rtx_REG (DImode, regno + 4);
5889 ops[2] = gen_rtx_REG (DImode, regno + 8);
5890 ops[3] = operands[1];
5891 ops[4] = GEN_INT (lane);
5892 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5896 [(set_attr "type" "neon_load3_one_lane<q>")]
5899 (define_insn "neon_vld3_dup<mode>"
5900 [(set (match_operand:EI 0 "s_register_operand" "=w")
5901 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5902 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5906 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5908 int regno = REGNO (operands[0]);
5910 ops[0] = gen_rtx_REG (DImode, regno);
5911 ops[1] = gen_rtx_REG (DImode, regno + 2);
5912 ops[2] = gen_rtx_REG (DImode, regno + 4);
5913 ops[3] = operands[1];
5914 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5918 return "vld1.<V_sz_elem>\t%h0, %A1";
5921 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5922 (const_string "neon_load3_all_lanes<q>")
5923 (const_string "neon_load1_1reg<q>")))])
5925 (define_expand "vec_store_lanesei<mode>"
5926 [(set (match_operand:EI 0 "neon_struct_operand")
5927 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5928 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5932 (define_insn "neon_vst3<mode>"
5933 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5934 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5935 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5939 if (<V_sz_elem> == 64)
5940 return "vst1.64\t%h1, %A0";
5942 return "vst3.<V_sz_elem>\t%h1, %A0";
5945 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5946 (const_string "neon_store1_3reg<q>")
5947 (const_string "neon_store3_one_lane<q>")))])
5949 (define_expand "vec_store_lanesci<mode>"
5950 [(match_operand:CI 0 "neon_struct_operand")
5951 (match_operand:CI 1 "s_register_operand")
5952 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5955 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5959 (define_expand "neon_vst3<mode>"
5960 [(match_operand:CI 0 "neon_struct_operand")
5961 (match_operand:CI 1 "s_register_operand")
5962 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5967 mem = adjust_address (operands[0], EImode, 0);
5968 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5969 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5970 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5974 (define_insn "neon_vst3qa<mode>"
5975 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5976 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5977 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5981 int regno = REGNO (operands[1]);
5983 ops[0] = operands[0];
5984 ops[1] = gen_rtx_REG (DImode, regno);
5985 ops[2] = gen_rtx_REG (DImode, regno + 4);
5986 ops[3] = gen_rtx_REG (DImode, regno + 8);
5987 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5990 [(set_attr "type" "neon_store3_3reg<q>")]
5993 (define_insn "neon_vst3qb<mode>"
5994 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5995 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5996 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6000 int regno = REGNO (operands[1]);
6002 ops[0] = operands[0];
6003 ops[1] = gen_rtx_REG (DImode, regno + 2);
6004 ops[2] = gen_rtx_REG (DImode, regno + 6);
6005 ops[3] = gen_rtx_REG (DImode, regno + 10);
6006 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6009 [(set_attr "type" "neon_store3_3reg<q>")]
6012 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6013 ;; here on big endian targets.
6014 (define_insn "neon_vst3_lane<mode>"
6015 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6016 (unspec:<V_three_elem>
6017 [(match_operand:EI 1 "s_register_operand" "w")
6018 (match_operand:SI 2 "immediate_operand" "i")
6019 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6023 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6024 int regno = REGNO (operands[1]);
6026 ops[0] = operands[0];
6027 ops[1] = gen_rtx_REG (DImode, regno);
6028 ops[2] = gen_rtx_REG (DImode, regno + 2);
6029 ops[3] = gen_rtx_REG (DImode, regno + 4);
6030 ops[4] = GEN_INT (lane);
6031 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6035 [(set_attr "type" "neon_store3_one_lane<q>")]
6038 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6039 ;; here on big endian targets.
6040 (define_insn "neon_vst3_lane<mode>"
6041 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6042 (unspec:<V_three_elem>
6043 [(match_operand:CI 1 "s_register_operand" "w")
6044 (match_operand:SI 2 "immediate_operand" "i")
6045 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6049 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6050 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6051 int regno = REGNO (operands[1]);
6053 if (lane >= max / 2)
6058 ops[0] = operands[0];
6059 ops[1] = gen_rtx_REG (DImode, regno);
6060 ops[2] = gen_rtx_REG (DImode, regno + 4);
6061 ops[3] = gen_rtx_REG (DImode, regno + 8);
6062 ops[4] = GEN_INT (lane);
6063 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6067 [(set_attr "type" "neon_store3_one_lane<q>")]
6070 (define_expand "vec_load_lanesoi<mode>"
6071 [(set (match_operand:OI 0 "s_register_operand")
6072 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6073 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6077 (define_insn "neon_vld4<mode>"
6078 [(set (match_operand:OI 0 "s_register_operand" "=w")
6079 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6080 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6084 if (<V_sz_elem> == 64)
6085 return "vld1.64\t%h0, %A1";
6087 return "vld4.<V_sz_elem>\t%h0, %A1";
6090 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6091 (const_string "neon_load1_4reg<q>")
6092 (const_string "neon_load4_4reg<q>")))]
6095 (define_expand "vec_load_lanesxi<mode>"
6096 [(match_operand:XI 0 "s_register_operand")
6097 (match_operand:XI 1 "neon_struct_operand")
6098 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6101 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6105 (define_expand "neon_vld4<mode>"
6106 [(match_operand:XI 0 "s_register_operand")
6107 (match_operand:XI 1 "neon_struct_operand")
6108 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6113 mem = adjust_address (operands[1], OImode, 0);
6114 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6115 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6116 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6120 (define_insn "neon_vld4qa<mode>"
6121 [(set (match_operand:XI 0 "s_register_operand" "=w")
6122 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6123 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6127 int regno = REGNO (operands[0]);
6129 ops[0] = gen_rtx_REG (DImode, regno);
6130 ops[1] = gen_rtx_REG (DImode, regno + 4);
6131 ops[2] = gen_rtx_REG (DImode, regno + 8);
6132 ops[3] = gen_rtx_REG (DImode, regno + 12);
6133 ops[4] = operands[1];
6134 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6137 [(set_attr "type" "neon_load4_4reg<q>")]
6140 (define_insn "neon_vld4qb<mode>"
6141 [(set (match_operand:XI 0 "s_register_operand" "=w")
6142 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6143 (match_operand:XI 2 "s_register_operand" "0")
6144 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6148 int regno = REGNO (operands[0]);
6150 ops[0] = gen_rtx_REG (DImode, regno + 2);
6151 ops[1] = gen_rtx_REG (DImode, regno + 6);
6152 ops[2] = gen_rtx_REG (DImode, regno + 10);
6153 ops[3] = gen_rtx_REG (DImode, regno + 14);
6154 ops[4] = operands[1];
6155 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6158 [(set_attr "type" "neon_load4_4reg<q>")]
6161 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6162 ;; here on big endian targets.
6163 (define_insn "neon_vld4_lane<mode>"
6164 [(set (match_operand:OI 0 "s_register_operand" "=w")
6165 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6166 (match_operand:OI 2 "s_register_operand" "0")
6167 (match_operand:SI 3 "immediate_operand" "i")
6168 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6172 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6173 int regno = REGNO (operands[0]);
6175 ops[0] = gen_rtx_REG (DImode, regno);
6176 ops[1] = gen_rtx_REG (DImode, regno + 2);
6177 ops[2] = gen_rtx_REG (DImode, regno + 4);
6178 ops[3] = gen_rtx_REG (DImode, regno + 6);
6179 ops[4] = operands[1];
6180 ops[5] = GEN_INT (lane);
6181 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6185 [(set_attr "type" "neon_load4_one_lane<q>")]
6188 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6189 ;; here on big endian targets.
6190 (define_insn "neon_vld4_lane<mode>"
6191 [(set (match_operand:XI 0 "s_register_operand" "=w")
6192 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6193 (match_operand:XI 2 "s_register_operand" "0")
6194 (match_operand:SI 3 "immediate_operand" "i")
6195 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6199 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6200 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6201 int regno = REGNO (operands[0]);
6203 if (lane >= max / 2)
6208 ops[0] = gen_rtx_REG (DImode, regno);
6209 ops[1] = gen_rtx_REG (DImode, regno + 4);
6210 ops[2] = gen_rtx_REG (DImode, regno + 8);
6211 ops[3] = gen_rtx_REG (DImode, regno + 12);
6212 ops[4] = operands[1];
6213 ops[5] = GEN_INT (lane);
6214 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6218 [(set_attr "type" "neon_load4_one_lane<q>")]
6221 (define_insn "neon_vld4_dup<mode>"
6222 [(set (match_operand:OI 0 "s_register_operand" "=w")
6223 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6224 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6228 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6230 int regno = REGNO (operands[0]);
6232 ops[0] = gen_rtx_REG (DImode, regno);
6233 ops[1] = gen_rtx_REG (DImode, regno + 2);
6234 ops[2] = gen_rtx_REG (DImode, regno + 4);
6235 ops[3] = gen_rtx_REG (DImode, regno + 6);
6236 ops[4] = operands[1];
6237 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6242 return "vld1.<V_sz_elem>\t%h0, %A1";
6245 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6246 (const_string "neon_load4_all_lanes<q>")
6247 (const_string "neon_load1_1reg<q>")))]
6250 (define_expand "vec_store_lanesoi<mode>"
6251 [(set (match_operand:OI 0 "neon_struct_operand")
6252 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6253 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6257 (define_insn "neon_vst4<mode>"
6258 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6259 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6260 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6264 if (<V_sz_elem> == 64)
6265 return "vst1.64\t%h1, %A0";
6267 return "vst4.<V_sz_elem>\t%h1, %A0";
6270 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6271 (const_string "neon_store1_4reg<q>")
6272 (const_string "neon_store4_4reg<q>")))]
6275 (define_expand "vec_store_lanesxi<mode>"
6276 [(match_operand:XI 0 "neon_struct_operand")
6277 (match_operand:XI 1 "s_register_operand")
6278 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6281 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6285 (define_expand "neon_vst4<mode>"
6286 [(match_operand:XI 0 "neon_struct_operand")
6287 (match_operand:XI 1 "s_register_operand")
6288 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6293 mem = adjust_address (operands[0], OImode, 0);
6294 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6295 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6296 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6300 (define_insn "neon_vst4qa<mode>"
6301 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6302 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6303 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6307 int regno = REGNO (operands[1]);
6309 ops[0] = operands[0];
6310 ops[1] = gen_rtx_REG (DImode, regno);
6311 ops[2] = gen_rtx_REG (DImode, regno + 4);
6312 ops[3] = gen_rtx_REG (DImode, regno + 8);
6313 ops[4] = gen_rtx_REG (DImode, regno + 12);
6314 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6317 [(set_attr "type" "neon_store4_4reg<q>")]
6320 (define_insn "neon_vst4qb<mode>"
6321 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6322 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6323 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6327 int regno = REGNO (operands[1]);
6329 ops[0] = operands[0];
6330 ops[1] = gen_rtx_REG (DImode, regno + 2);
6331 ops[2] = gen_rtx_REG (DImode, regno + 6);
6332 ops[3] = gen_rtx_REG (DImode, regno + 10);
6333 ops[4] = gen_rtx_REG (DImode, regno + 14);
6334 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6337 [(set_attr "type" "neon_store4_4reg<q>")]
6340 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6341 ;; here on big endian targets.
6342 (define_insn "neon_vst4_lane<mode>"
6343 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6344 (unspec:<V_four_elem>
6345 [(match_operand:OI 1 "s_register_operand" "w")
6346 (match_operand:SI 2 "immediate_operand" "i")
6347 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6351 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6352 int regno = REGNO (operands[1]);
6354 ops[0] = operands[0];
6355 ops[1] = gen_rtx_REG (DImode, regno);
6356 ops[2] = gen_rtx_REG (DImode, regno + 2);
6357 ops[3] = gen_rtx_REG (DImode, regno + 4);
6358 ops[4] = gen_rtx_REG (DImode, regno + 6);
6359 ops[5] = GEN_INT (lane);
6360 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6364 [(set_attr "type" "neon_store4_one_lane<q>")]
6367 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6368 ;; here on big endian targets.
6369 (define_insn "neon_vst4_lane<mode>"
6370 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6371 (unspec:<V_four_elem>
6372 [(match_operand:XI 1 "s_register_operand" "w")
6373 (match_operand:SI 2 "immediate_operand" "i")
6374 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6378 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6379 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6380 int regno = REGNO (operands[1]);
6382 if (lane >= max / 2)
6387 ops[0] = operands[0];
6388 ops[1] = gen_rtx_REG (DImode, regno);
6389 ops[2] = gen_rtx_REG (DImode, regno + 4);
6390 ops[3] = gen_rtx_REG (DImode, regno + 8);
6391 ops[4] = gen_rtx_REG (DImode, regno + 12);
6392 ops[5] = GEN_INT (lane);
6393 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6397 [(set_attr "type" "neon_store4_4reg<q>")]
6400 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6401 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6402 (SE:<V_unpack> (vec_select:<V_HALF>
6403 (match_operand:VU 1 "register_operand" "w")
6404 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6405 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6406 "vmovl.<US><V_sz_elem> %q0, %e1"
6407 [(set_attr "type" "neon_shift_imm_long")]
6410 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6411 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6412 (SE:<V_unpack> (vec_select:<V_HALF>
6413 (match_operand:VU 1 "register_operand" "w")
6414 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6415 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6416 "vmovl.<US><V_sz_elem> %q0, %f1"
6417 [(set_attr "type" "neon_shift_imm_long")]
6420 (define_expand "vec_unpack<US>_hi_<mode>"
6421 [(match_operand:<V_unpack> 0 "register_operand" "")
6422 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6423 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6425 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6428 for (i = 0; i < (<V_mode_nunits>/2); i++)
6429 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6431 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6432 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6439 (define_expand "vec_unpack<US>_lo_<mode>"
6440 [(match_operand:<V_unpack> 0 "register_operand" "")
6441 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6442 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6444 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6447 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6448 RTVEC_ELT (v, i) = GEN_INT (i);
6449 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6450 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6457 (define_insn "neon_vec_<US>mult_lo_<mode>"
6458 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6459 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6460 (match_operand:VU 1 "register_operand" "w")
6461 (match_operand:VU 2 "vect_par_constant_low" "")))
6462 (SE:<V_unpack> (vec_select:<V_HALF>
6463 (match_operand:VU 3 "register_operand" "w")
6465 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6466 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6467 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6470 (define_expand "vec_widen_<US>mult_lo_<mode>"
6471 [(match_operand:<V_unpack> 0 "register_operand" "")
6472 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6473 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6474 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6476 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6479 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6480 RTVEC_ELT (v, i) = GEN_INT (i);
6481 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6483 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6491 (define_insn "neon_vec_<US>mult_hi_<mode>"
6492 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6493 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6494 (match_operand:VU 1 "register_operand" "w")
6495 (match_operand:VU 2 "vect_par_constant_high" "")))
6496 (SE:<V_unpack> (vec_select:<V_HALF>
6497 (match_operand:VU 3 "register_operand" "w")
6499 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6500 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6501 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6504 (define_expand "vec_widen_<US>mult_hi_<mode>"
6505 [(match_operand:<V_unpack> 0 "register_operand" "")
6506 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6507 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6508 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6510 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6513 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6514 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6515 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6517 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6526 (define_insn "neon_vec_<US>shiftl_<mode>"
6527 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6528 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6529 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6532 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6534 [(set_attr "type" "neon_shift_imm_long")]
6537 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6538 [(match_operand:<V_unpack> 0 "register_operand" "")
6539 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6540 (match_operand:SI 2 "immediate_operand" "i")]
6541 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6543 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6544 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6550 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6551 [(match_operand:<V_unpack> 0 "register_operand" "")
6552 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6553 (match_operand:SI 2 "immediate_operand" "i")]
6554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6556 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6557 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6558 GET_MODE_SIZE (<V_HALF>mode)),
6564 ;; Vectorize for non-neon-quad case
6565 (define_insn "neon_unpack<US>_<mode>"
6566 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6567 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6569 "vmovl.<US><V_sz_elem> %q0, %P1"
6570 [(set_attr "type" "neon_move")]
6573 (define_expand "vec_unpack<US>_lo_<mode>"
6574 [(match_operand:<V_double_width> 0 "register_operand" "")
6575 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6578 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6579 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6580 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6586 (define_expand "vec_unpack<US>_hi_<mode>"
6587 [(match_operand:<V_double_width> 0 "register_operand" "")
6588 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6591 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6592 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6593 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6599 (define_insn "neon_vec_<US>mult_<mode>"
6600 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6601 (mult:<V_widen> (SE:<V_widen>
6602 (match_operand:VDI 1 "register_operand" "w"))
6604 (match_operand:VDI 2 "register_operand" "w"))))]
6606 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6607 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6610 (define_expand "vec_widen_<US>mult_hi_<mode>"
6611 [(match_operand:<V_double_width> 0 "register_operand" "")
6612 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6613 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6616 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6617 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6618 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6625 (define_expand "vec_widen_<US>mult_lo_<mode>"
6626 [(match_operand:<V_double_width> 0 "register_operand" "")
6627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6628 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6632 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6633 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6640 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6641 [(match_operand:<V_double_width> 0 "register_operand" "")
6642 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6643 (match_operand:SI 2 "immediate_operand" "i")]
6646 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6647 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6648 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6654 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6655 [(match_operand:<V_double_width> 0 "register_operand" "")
6656 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6657 (match_operand:SI 2 "immediate_operand" "i")]
6660 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6661 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6662 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6668 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6669 ; because the ordering of vector elements in Q registers is different from what
6670 ; the semantics of the instructions require.
6672 (define_insn "vec_pack_trunc_<mode>"
6673 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6674 (vec_concat:<V_narrow_pack>
6675 (truncate:<V_narrow>
6676 (match_operand:VN 1 "register_operand" "w"))
6677 (truncate:<V_narrow>
6678 (match_operand:VN 2 "register_operand" "w"))))]
6679 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6680 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6681 [(set_attr "type" "multiple")
6682 (set_attr "length" "8")]
6685 ;; For the non-quad case.
6686 (define_insn "neon_vec_pack_trunc_<mode>"
6687 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6688 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6689 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6690 "vmovn.i<V_sz_elem>\t%P0, %q1"
6691 [(set_attr "type" "neon_move_narrow_q")]
6694 (define_expand "vec_pack_trunc_<mode>"
6695 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6696 (match_operand:VSHFT 1 "register_operand" "")
6697 (match_operand:VSHFT 2 "register_operand")]
6698 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6700 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6702 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6703 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6704 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6708 (define_insn "neon_vabd<mode>_2"
6709 [(set (match_operand:VF 0 "s_register_operand" "=w")
6710 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6711 (match_operand:VF 2 "s_register_operand" "w"))))]
6712 "TARGET_NEON && flag_unsafe_math_optimizations"
6713 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6714 [(set_attr "type" "neon_fp_abd_s<q>")]
6717 (define_insn "neon_vabd<mode>_3"
6718 [(set (match_operand:VF 0 "s_register_operand" "=w")
6719 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6720 (match_operand:VF 2 "s_register_operand" "w")]
6722 "TARGET_NEON && flag_unsafe_math_optimizations"
6723 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6724 [(set_attr "type" "neon_fp_abd_s<q>")]
6727 ;; Copy from core-to-neon regs, then extend, not vice-versa
6730 [(set (match_operand:DI 0 "s_register_operand" "")
6731 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6732 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6733 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6734 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6736 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6740 [(set (match_operand:DI 0 "s_register_operand" "")
6741 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6742 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6743 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6744 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6746 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6750 [(set (match_operand:DI 0 "s_register_operand" "")
6751 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6752 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6753 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6754 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6756 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6760 [(set (match_operand:DI 0 "s_register_operand" "")
6761 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6762 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6763 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6764 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6766 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6770 [(set (match_operand:DI 0 "s_register_operand" "")
6771 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6772 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6773 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6774 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6776 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6780 [(set (match_operand:DI 0 "s_register_operand" "")
6781 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6782 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6783 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6784 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6786 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));