1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode><V_elem_l>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode><V_elem_l>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2didi"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode><V_elem_l>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
511 (define_insn "add<mode>3"
513 (match_operand:VH 0 "s_register_operand" "=w")
515 (match_operand:VH 1 "s_register_operand" "w")
516 (match_operand:VH 2 "s_register_operand" "w")))]
517 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
520 (if_then_else (match_test "<Is_float_mode>")
521 (const_string "neon_fp_addsub_s<q>")
522 (const_string "neon_add<q>")))]
525 (define_insn "add<mode>3_fp16"
527 (match_operand:VH 0 "s_register_operand" "=w")
529 (match_operand:VH 1 "s_register_operand" "w")
530 (match_operand:VH 2 "s_register_operand" "w")))]
531 "TARGET_NEON_FP16INST"
532 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
534 (if_then_else (match_test "<Is_float_mode>")
535 (const_string "neon_fp_addsub_s<q>")
536 (const_string "neon_add<q>")))]
539 (define_insn "adddi3_neon"
540 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
543 (clobber (reg:CC CC_REGNUM))]
546 switch (which_alternative)
548 case 0: /* fall through */
549 case 3: return "vadd.i64\t%P0, %P1, %P2";
555 default: gcc_unreachable ();
558 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559 multiple,multiple,multiple")
560 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561 (set_attr "length" "*,8,8,*,8,8,8")
562 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
565 (define_insn "*sub<mode>3_neon"
566 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568 (match_operand:VDQ 2 "s_register_operand" "w")))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_addsub_s<q>")
574 (const_string "neon_sub<q>")))]
577 (define_insn "sub<mode>3"
579 (match_operand:VH 0 "s_register_operand" "=w")
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
588 (define_insn "sub<mode>3_fp16"
590 (match_operand:VH 0 "s_register_operand" "=w")
592 (match_operand:VH 1 "s_register_operand" "w")
593 (match_operand:VH 2 "s_register_operand" "w")))]
594 "TARGET_NEON_FP16INST"
595 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596 [(set_attr "type" "neon_sub<q>")]
599 (define_insn "subdi3_neon"
600 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603 (clobber (reg:CC CC_REGNUM))]
606 switch (which_alternative)
608 case 0: /* fall through */
609 case 4: return "vsub.i64\t%P0, %P1, %P2";
610 case 1: /* fall through */
611 case 2: /* fall through */
612 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613 default: gcc_unreachable ();
616 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617 (set_attr "conds" "*,clob,clob,clob,*")
618 (set_attr "length" "*,8,8,8,*")
619 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
622 (define_insn "*mul<mode>3_neon"
623 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625 (match_operand:VDQW 2 "s_register_operand" "w")))]
626 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 (if_then_else (match_test "<Is_float_mode>")
630 (const_string "neon_fp_mul_s<q>")
631 (const_string "neon_mul_<V_elem_ch><q>")))]
634 (define_insn "mul<mode>3add<mode>_neon"
635 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637 (match_operand:VDQW 3 "s_register_operand" "w"))
638 (match_operand:VDQW 1 "s_register_operand" "0")))]
639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
642 (if_then_else (match_test "<Is_float_mode>")
643 (const_string "neon_fp_mla_s<q>")
644 (const_string "neon_mla_<V_elem_ch><q>")))]
647 (define_insn "mul<mode>3add<mode>_neon"
648 [(set (match_operand:VH 0 "s_register_operand" "=w")
649 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650 (match_operand:VH 3 "s_register_operand" "w"))
651 (match_operand:VH 1 "s_register_operand" "0")))]
652 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661 (match_operand:VDQW 3 "s_register_operand" "w"))))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;; to be able to use when converting to FMA.
674 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676 [(set (match_operand:VCVTF 0 "register_operand" "=w")
677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678 (match_operand:VCVTF 2 "register_operand" "w")
679 (match_operand:VCVTF 3 "register_operand" "0")))]
680 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682 [(set_attr "type" "neon_fp_mla_s<q>")]
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686 [(set (match_operand:VCVTF 0 "register_operand" "=w")
687 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688 (match_operand:VCVTF 2 "register_operand" "w")
689 (match_operand:VCVTF 3 "register_operand" "0")))]
690 "TARGET_NEON && TARGET_FMA"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
695 (define_insn "fma<VH:mode>4"
696 [(set (match_operand:VH 0 "register_operand" "=w")
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VH:mode>4_intrinsic"
707 [(set (match_operand:VH 0 "register_operand" "=w")
709 (match_operand:VH 1 "register_operand" "w")
710 (match_operand:VH 2 "register_operand" "w")
711 (match_operand:VH 3 "register_operand" "0")))]
712 "TARGET_NEON_FP16INST"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
717 (define_insn "*fmsub<VCVTF:mode>4"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728 [(set (match_operand:VCVTF 0 "register_operand" "=w")
730 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731 (match_operand:VCVTF 2 "register_operand" "w")
732 (match_operand:VCVTF 3 "register_operand" "0")))]
733 "TARGET_NEON && TARGET_FMA"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739 [(set (match_operand:VH 0 "register_operand" "=w")
741 (neg:VH (match_operand:VH 1 "register_operand" "w"))
742 (match_operand:VH 2 "register_operand" "w")
743 (match_operand:VH 3 "register_operand" "0")))]
744 "TARGET_NEON_FP16INST"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751 (unspec:VCVTF [(match_operand:VCVTF 1
752 "s_register_operand" "w")]
754 "TARGET_NEON && TARGET_VFP5"
755 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761 (FIXUORS:<V_cmp_result> (unspec:VCVTF
762 [(match_operand:VCVTF 1 "register_operand" "w")]
764 "TARGET_NEON && TARGET_VFP5"
765 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767 (set_attr "predicable" "no")]
770 (define_insn "ior<mode>3"
771 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
776 switch (which_alternative)
778 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781 default: gcc_unreachable ();
784 [(set_attr "type" "neon_logic<q>")]
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
792 (define_insn "and<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
798 switch (which_alternative)
800 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vand", &operands[2],
802 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
806 [(set_attr "type" "neon_logic<q>")]
809 (define_insn "orn<mode>3_neon"
810 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812 (match_operand:VDQ 1 "s_register_operand" "w")))]
814 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815 [(set_attr "type" "neon_logic<q>")]
818 ;; TODO: investigate whether we should disable
819 ;; this and bicdi3_neon for the A8 in line with the other
821 (define_insn_and_split "orndi3_neon"
822 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
832 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
839 operands[3] = gen_highpart (SImode, operands[0]);
840 operands[0] = gen_lowpart (SImode, operands[0]);
841 operands[4] = gen_highpart (SImode, operands[2]);
842 operands[2] = gen_lowpart (SImode, operands[2]);
843 operands[5] = gen_highpart (SImode, operands[1]);
844 operands[1] = gen_lowpart (SImode, operands[1]);
848 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
853 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854 (set_attr "length" "*,16,8,8")
855 (set_attr "arch" "any,a,t2,t2")]
858 (define_insn "bic<mode>3_neon"
859 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861 (match_operand:VDQ 1 "s_register_operand" "w")))]
863 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 [(set_attr "type" "neon_logic<q>")]
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
877 [(set_attr "type" "neon_logic,multiple,multiple")
878 (set_attr "length" "*,8,8")]
881 (define_insn "xor<mode>3"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884 (match_operand:VDQ 2 "s_register_operand" "w")))]
886 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
890 (define_insn "one_cmpl<mode>2"
891 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
894 "vmvn\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_move<q>")]
898 (define_insn "abs<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
902 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_abs_s<q>")
906 (const_string "neon_abs<q>")))]
909 (define_insn "neg<mode>2"
910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
913 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
915 (if_then_else (match_test "<Is_float_mode>")
916 (const_string "neon_fp_neg_s<q>")
917 (const_string "neon_neg<q>")))]
920 (define_insn "negdi2_neon"
921 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
922 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
923 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
924 (clobber (reg:CC CC_REGNUM))]
927 [(set_attr "length" "8")
928 (set_attr "type" "multiple")]
931 ; Split negdi2_neon for vfp registers
933 [(set (match_operand:DI 0 "s_register_operand" "")
934 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935 (clobber (match_scratch:DI 2 ""))
936 (clobber (reg:CC CC_REGNUM))]
937 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938 [(set (match_dup 2) (const_int 0))
939 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940 (clobber (reg:CC CC_REGNUM))])]
942 if (!REG_P (operands[2]))
943 operands[2] = operands[0];
947 ; Split negdi2_neon for core registers
949 [(set (match_operand:DI 0 "s_register_operand" "")
950 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951 (clobber (match_scratch:DI 2 ""))
952 (clobber (reg:CC CC_REGNUM))]
953 "TARGET_32BIT && reload_completed
954 && arm_general_register_operand (operands[0], DImode)"
955 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956 (clobber (reg:CC CC_REGNUM))])]
960 (define_insn "<absneg_str><mode>2"
961 [(set (match_operand:VH 0 "s_register_operand" "=w")
962 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963 "TARGET_NEON_FP16INST"
964 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965 [(set_attr "type" "neon_abs<q>")]
968 (define_expand "neon_v<absneg_str><mode>"
970 (match_operand:VH 0 "s_register_operand")
971 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972 "TARGET_NEON_FP16INST"
974 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979 [(set (match_operand:VH 0 "s_register_operand" "=w")
981 [(match_operand:VH 1 "s_register_operand" "w")]
983 "TARGET_NEON_FP16INST"
984 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_round_s<q>")]
988 (define_insn "neon_vrsqrte<mode>"
989 [(set (match_operand:VH 0 "s_register_operand" "=w")
991 [(match_operand:VH 1 "s_register_operand" "w")]
993 "TARGET_NEON_FP16INST"
994 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
998 (define_insn "*umin<mode>3_neon"
999 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1003 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 [(set_attr "type" "neon_minmax<q>")]
1007 (define_insn "*umax<mode>3_neon"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1012 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013 [(set_attr "type" "neon_minmax<q>")]
1016 (define_insn "*smin<mode>3_neon"
1017 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019 (match_operand:VDQW 2 "s_register_operand" "w")))]
1021 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1023 (if_then_else (match_test "<Is_float_mode>")
1024 (const_string "neon_fp_minmax_s<q>")
1025 (const_string "neon_minmax<q>")))]
1028 (define_insn "*smax<mode>3_neon"
1029 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031 (match_operand:VDQW 2 "s_register_operand" "w")))]
1033 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1035 (if_then_else (match_test "<Is_float_mode>")
1036 (const_string "neon_fp_minmax_s<q>")
1037 (const_string "neon_minmax<q>")))]
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code. It ends up creating a V2DI constructor with
1044 (define_insn "vashl<mode>3"
1045 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1050 switch (which_alternative)
1052 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1055 VALID_NEON_QREG_MODE (<MODE>mode),
1057 default: gcc_unreachable ();
1060 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1063 (define_insn "vashr<mode>3_imm"
1064 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1069 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1073 [(set_attr "type" "neon_shift_imm<q>")]
1076 (define_insn "vlshr<mode>3_imm"
1077 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1082 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1086 [(set_attr "type" "neon_shift_imm<q>")]
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1094 (define_insn "ashl<mode>3_signed"
1095 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097 (match_operand:VDQI 2 "s_register_operand" "w")]
1098 UNSPEC_ASHIFT_SIGNED))]
1100 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101 [(set_attr "type" "neon_shift_reg<q>")]
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1107 (define_insn "ashl<mode>3_unsigned"
1108 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110 (match_operand:VDQI 2 "s_register_operand" "w")]
1111 UNSPEC_ASHIFT_UNSIGNED))]
1113 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114 [(set_attr "type" "neon_shift_reg<q>")]
1117 (define_expand "vashr<mode>3"
1118 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1123 if (s_register_operand (operands[2], <MODE>mode))
1125 rtx neg = gen_reg_rtx (<MODE>mode);
1126 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1130 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1134 (define_expand "vlshr<mode>3"
1135 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1140 if (s_register_operand (operands[2], <MODE>mode))
1142 rtx neg = gen_reg_rtx (<MODE>mode);
1143 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1147 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized. This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1158 (define_insn "neon_load_count"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161 UNSPEC_LOAD_COUNT))]
1164 vld1.32\t{%P0[0]}, %A1
1165 vmov.32\t%P0[0], %1"
1166 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1169 (define_insn "ashldi3_neon_noclobber"
1170 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1171 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173 "TARGET_NEON && reload_completed
1174 && (!CONST_INT_P (operands[2])
1175 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1177 vshl.u64\t%P0, %P1, %2
1178 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1182 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1186 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1187 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1189 (clobber (reg:CC_C CC_REGNUM))]
1192 "TARGET_NEON && reload_completed"
1196 if (IS_VFP_REGNUM (REGNO (operands[0])))
1198 if (CONST_INT_P (operands[2]))
1200 if (INTVAL (operands[2]) < 1)
1202 emit_insn (gen_movdi (operands[0], operands[1]));
1205 else if (INTVAL (operands[2]) > 63)
1206 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1210 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211 operands[2] = operands[5];
1214 /* Ditch the unnecessary clobbers. */
1215 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1220 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1]));
1224 if (operands[2] == CONST1_RTX (SImode))
1225 /* This clobbers CC. */
1226 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1228 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1229 operands[2], operands[3], operands[4]);
1233 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1234 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1235 (set_attr "type" "multiple")]
1238 ; The shift amount needs to be negated for right-shifts
1239 (define_insn "signed_shift_di3_neon"
1240 [(set (match_operand:DI 0 "s_register_operand" "=w")
1241 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1242 (match_operand:DI 2 "s_register_operand" " w")]
1243 UNSPEC_ASHIFT_SIGNED))]
1244 "TARGET_NEON && reload_completed"
1245 "vshl.s64\t%P0, %P1, %P2"
1246 [(set_attr "type" "neon_shift_reg")]
1249 ; The shift amount needs to be negated for right-shifts
1250 (define_insn "unsigned_shift_di3_neon"
1251 [(set (match_operand:DI 0 "s_register_operand" "=w")
1252 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1253 (match_operand:DI 2 "s_register_operand" " w")]
1254 UNSPEC_ASHIFT_UNSIGNED))]
1255 "TARGET_NEON && reload_completed"
1256 "vshl.u64\t%P0, %P1, %P2"
1257 [(set_attr "type" "neon_shift_reg")]
1260 (define_insn "ashrdi3_neon_imm_noclobber"
1261 [(set (match_operand:DI 0 "s_register_operand" "=w")
1262 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1263 (match_operand:DI 2 "const_int_operand" " i")))]
1264 "TARGET_NEON && reload_completed
1265 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1266 "vshr.s64\t%P0, %P1, %2"
1267 [(set_attr "type" "neon_shift_imm")]
1270 (define_insn "lshrdi3_neon_imm_noclobber"
1271 [(set (match_operand:DI 0 "s_register_operand" "=w")
1272 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1273 (match_operand:DI 2 "const_int_operand" " i")))]
1274 "TARGET_NEON && reload_completed
1275 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1276 "vshr.u64\t%P0, %P1, %2"
1277 [(set_attr "type" "neon_shift_imm")]
1282 (define_insn_and_split "<shift>di3_neon"
1283 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1284 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1285 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1286 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1287 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1288 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1289 (clobber (reg:CC CC_REGNUM))]
1292 "TARGET_NEON && reload_completed"
1296 if (IS_VFP_REGNUM (REGNO (operands[0])))
1298 if (CONST_INT_P (operands[2]))
1300 if (INTVAL (operands[2]) < 1)
1302 emit_insn (gen_movdi (operands[0], operands[1]));
1305 else if (INTVAL (operands[2]) > 64)
1306 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1308 /* Ditch the unnecessary clobbers. */
1309 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1315 /* We must use a negative left-shift. */
1316 emit_insn (gen_negsi2 (operands[3], operands[2]));
1317 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1318 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1324 /* The shift expanders support either full overlap or no overlap. */
1325 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1326 || REGNO (operands[0]) == REGNO (operands[1]));
1328 if (operands[2] == CONST1_RTX (SImode))
1329 /* This clobbers CC. */
1330 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1332 /* This clobbers CC (ASHIFTRT by register only). */
1333 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1334 operands[2], operands[3], operands[4]);
1339 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1340 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1341 (set_attr "type" "multiple")]
1344 ;; Widening operations
1346 (define_expand "widen_ssum<mode>3"
1347 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1348 (plus:<V_double_width>
1349 (sign_extend:<V_double_width>
1350 (match_operand:VQI 1 "s_register_operand" ""))
1351 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1354 machine_mode mode = GET_MODE (operands[1]);
1357 p1 = arm_simd_vect_par_cnst_half (mode, false);
1358 p2 = arm_simd_vect_par_cnst_half (mode, true);
1360 if (operands[0] != operands[2])
1361 emit_move_insn (operands[0], operands[2]);
1363 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1367 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1375 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1376 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1377 (plus:<V_double_width>
1378 (sign_extend:<V_double_width>
1379 (vec_select:<V_HALF>
1380 (match_operand:VQI 1 "s_register_operand" "%w")
1381 (match_operand:VQI 2 "vect_par_constant_low" "")))
1382 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1385 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1386 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1388 [(set_attr "type" "neon_add_widen")])
1390 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1391 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1392 (plus:<V_double_width>
1393 (sign_extend:<V_double_width>
1394 (vec_select:<V_HALF>
1395 (match_operand:VQI 1 "s_register_operand" "%w")
1396 (match_operand:VQI 2 "vect_par_constant_high" "")))
1397 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1400 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1401 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1403 [(set_attr "type" "neon_add_widen")])
1405 (define_insn "widen_ssum<mode>3"
1406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1408 (sign_extend:<V_widen>
1409 (match_operand:VW 1 "s_register_operand" "%w"))
1410 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1412 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1413 [(set_attr "type" "neon_add_widen")]
1416 (define_expand "widen_usum<mode>3"
1417 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1418 (plus:<V_double_width>
1419 (zero_extend:<V_double_width>
1420 (match_operand:VQI 1 "s_register_operand" ""))
1421 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1424 machine_mode mode = GET_MODE (operands[1]);
1427 p1 = arm_simd_vect_par_cnst_half (mode, false);
1428 p2 = arm_simd_vect_par_cnst_half (mode, true);
1430 if (operands[0] != operands[2])
1431 emit_move_insn (operands[0], operands[2]);
1433 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1437 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1445 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1446 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1447 (plus:<V_double_width>
1448 (zero_extend:<V_double_width>
1449 (vec_select:<V_HALF>
1450 (match_operand:VQI 1 "s_register_operand" "%w")
1451 (match_operand:VQI 2 "vect_par_constant_low" "")))
1452 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1455 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1456 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1458 [(set_attr "type" "neon_add_widen")])
1460 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1461 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1462 (plus:<V_double_width>
1463 (zero_extend:<V_double_width>
1464 (vec_select:<V_HALF>
1465 (match_operand:VQI 1 "s_register_operand" "%w")
1466 (match_operand:VQI 2 "vect_par_constant_high" "")))
1467 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1470 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1471 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1473 [(set_attr "type" "neon_add_widen")])
1475 (define_insn "widen_usum<mode>3"
1476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1477 (plus:<V_widen> (zero_extend:<V_widen>
1478 (match_operand:VW 1 "s_register_operand" "%w"))
1479 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1481 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1482 [(set_attr "type" "neon_add_widen")]
1485 ;; Helpers for quad-word reduction operations
1487 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1488 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1489 ; N/2-element vector.
1491 (define_insn "quad_halves_<code>v4si"
1492 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1494 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1495 (parallel [(const_int 0) (const_int 1)]))
1496 (vec_select:V2SI (match_dup 1)
1497 (parallel [(const_int 2) (const_int 3)]))))]
1499 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1500 [(set_attr "vqh_mnem" "<VQH_mnem>")
1501 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1504 (define_insn "quad_halves_<code>v4sf"
1505 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1507 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1508 (parallel [(const_int 0) (const_int 1)]))
1509 (vec_select:V2SF (match_dup 1)
1510 (parallel [(const_int 2) (const_int 3)]))))]
1511 "TARGET_NEON && flag_unsafe_math_optimizations"
1512 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1513 [(set_attr "vqh_mnem" "<VQH_mnem>")
1514 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1517 (define_insn "quad_halves_<code>v8hi"
1518 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1520 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1521 (parallel [(const_int 0) (const_int 1)
1522 (const_int 2) (const_int 3)]))
1523 (vec_select:V4HI (match_dup 1)
1524 (parallel [(const_int 4) (const_int 5)
1525 (const_int 6) (const_int 7)]))))]
1527 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1532 (define_insn "quad_halves_<code>v16qi"
1533 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1535 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1536 (parallel [(const_int 0) (const_int 1)
1537 (const_int 2) (const_int 3)
1538 (const_int 4) (const_int 5)
1539 (const_int 6) (const_int 7)]))
1540 (vec_select:V8QI (match_dup 1)
1541 (parallel [(const_int 8) (const_int 9)
1542 (const_int 10) (const_int 11)
1543 (const_int 12) (const_int 13)
1544 (const_int 14) (const_int 15)]))))]
1546 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1547 [(set_attr "vqh_mnem" "<VQH_mnem>")
1548 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1551 (define_expand "move_hi_quad_<mode>"
1552 [(match_operand:ANY128 0 "s_register_operand" "")
1553 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1556 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1557 GET_MODE_SIZE (<V_HALF>mode)),
1562 (define_expand "move_lo_quad_<mode>"
1563 [(match_operand:ANY128 0 "s_register_operand" "")
1564 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1567 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1573 ;; Reduction operations
1575 (define_expand "reduc_plus_scal_<mode>"
1576 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1577 (match_operand:VD 1 "s_register_operand" "")]
1578 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1580 rtx vec = gen_reg_rtx (<MODE>mode);
1581 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1582 &gen_neon_vpadd_internal<mode>);
1583 /* The same result is actually computed into every element. */
1584 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1588 (define_expand "reduc_plus_scal_<mode>"
1589 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590 (match_operand:VQ 1 "s_register_operand" "")]
1591 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1592 && !BYTES_BIG_ENDIAN"
1594 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1596 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1597 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1602 (define_expand "reduc_plus_scal_v2di"
1603 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1604 (match_operand:V2DI 1 "s_register_operand" "")]
1605 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1607 rtx vec = gen_reg_rtx (V2DImode);
1609 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1610 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1615 (define_insn "arm_reduc_plus_internal_v2di"
1616 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1617 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1619 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620 "vadd.i64\t%e0, %e1, %f1"
1621 [(set_attr "type" "neon_add_q")]
1624 (define_expand "reduc_smin_scal_<mode>"
1625 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1626 (match_operand:VD 1 "s_register_operand" "")]
1627 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1629 rtx vec = gen_reg_rtx (<MODE>mode);
1631 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1632 &gen_neon_vpsmin<mode>);
1633 /* The result is computed into every element of the vector. */
1634 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1638 (define_expand "reduc_smin_scal_<mode>"
1639 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1640 (match_operand:VQ 1 "s_register_operand" "")]
1641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1642 && !BYTES_BIG_ENDIAN"
1644 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1646 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1647 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1652 (define_expand "reduc_smax_scal_<mode>"
1653 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1654 (match_operand:VD 1 "s_register_operand" "")]
1655 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1657 rtx vec = gen_reg_rtx (<MODE>mode);
1658 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1659 &gen_neon_vpsmax<mode>);
1660 /* The result is computed into every element of the vector. */
1661 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1665 (define_expand "reduc_smax_scal_<mode>"
1666 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667 (match_operand:VQ 1 "s_register_operand" "")]
1668 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1669 && !BYTES_BIG_ENDIAN"
1671 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1673 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1674 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1679 (define_expand "reduc_umin_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VDI 1 "s_register_operand" "")]
1684 rtx vec = gen_reg_rtx (<MODE>mode);
1685 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1686 &gen_neon_vpumin<mode>);
1687 /* The result is computed into every element of the vector. */
1688 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1692 (define_expand "reduc_umin_scal_<mode>"
1693 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694 (match_operand:VQI 1 "s_register_operand" "")]
1695 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1697 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1699 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1700 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1705 (define_expand "reduc_umax_scal_<mode>"
1706 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707 (match_operand:VDI 1 "s_register_operand" "")]
1710 rtx vec = gen_reg_rtx (<MODE>mode);
1711 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1712 &gen_neon_vpumax<mode>);
1713 /* The result is computed into every element of the vector. */
1714 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1718 (define_expand "reduc_umax_scal_<mode>"
1719 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720 (match_operand:VQI 1 "s_register_operand" "")]
1721 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1723 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1725 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1726 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1731 (define_insn "neon_vpadd_internal<mode>"
1732 [(set (match_operand:VD 0 "s_register_operand" "=w")
1733 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1734 (match_operand:VD 2 "s_register_operand" "w")]
1737 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1738 ;; Assume this schedules like vadd.
1740 (if_then_else (match_test "<Is_float_mode>")
1741 (const_string "neon_fp_reduc_add_s<q>")
1742 (const_string "neon_reduc_add<q>")))]
1745 (define_insn "neon_vpaddv4hf"
1747 (match_operand:V4HF 0 "s_register_operand" "=w")
1748 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1749 (match_operand:V4HF 2 "s_register_operand" "w")]
1751 "TARGET_NEON_FP16INST"
1752 "vpadd.f16\t%P0, %P1, %P2"
1753 [(set_attr "type" "neon_reduc_add")]
1756 (define_insn "neon_vpsmin<mode>"
1757 [(set (match_operand:VD 0 "s_register_operand" "=w")
1758 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1759 (match_operand:VD 2 "s_register_operand" "w")]
1762 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1764 (if_then_else (match_test "<Is_float_mode>")
1765 (const_string "neon_fp_reduc_minmax_s<q>")
1766 (const_string "neon_reduc_minmax<q>")))]
1769 (define_insn "neon_vpsmax<mode>"
1770 [(set (match_operand:VD 0 "s_register_operand" "=w")
1771 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772 (match_operand:VD 2 "s_register_operand" "w")]
1775 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1777 (if_then_else (match_test "<Is_float_mode>")
1778 (const_string "neon_fp_reduc_minmax_s<q>")
1779 (const_string "neon_reduc_minmax<q>")))]
1782 (define_insn "neon_vpumin<mode>"
1783 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1784 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1785 (match_operand:VDI 2 "s_register_operand" "w")]
1788 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1789 [(set_attr "type" "neon_reduc_minmax<q>")]
1792 (define_insn "neon_vpumax<mode>"
1793 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1794 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1795 (match_operand:VDI 2 "s_register_operand" "w")]
1798 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1799 [(set_attr "type" "neon_reduc_minmax<q>")]
1802 ;; Saturating arithmetic
1804 ; NOTE: Neon supports many more saturating variants of instructions than the
1805 ; following, but these are all GCC currently understands.
1806 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1807 ; yet either, although these patterns may be used by intrinsics when they're
1810 (define_insn "*ss_add<mode>_neon"
1811 [(set (match_operand:VD 0 "s_register_operand" "=w")
1812 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1813 (match_operand:VD 2 "s_register_operand" "w")))]
1815 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1816 [(set_attr "type" "neon_qadd<q>")]
1819 (define_insn "*us_add<mode>_neon"
1820 [(set (match_operand:VD 0 "s_register_operand" "=w")
1821 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1822 (match_operand:VD 2 "s_register_operand" "w")))]
1824 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1825 [(set_attr "type" "neon_qadd<q>")]
1828 (define_insn "*ss_sub<mode>_neon"
1829 [(set (match_operand:VD 0 "s_register_operand" "=w")
1830 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1831 (match_operand:VD 2 "s_register_operand" "w")))]
1833 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1834 [(set_attr "type" "neon_qsub<q>")]
1837 (define_insn "*us_sub<mode>_neon"
1838 [(set (match_operand:VD 0 "s_register_operand" "=w")
1839 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1840 (match_operand:VD 2 "s_register_operand" "w")))]
1842 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1843 [(set_attr "type" "neon_qsub<q>")]
1846 ;; Conditional instructions. These are comparisons with conditional moves for
1847 ;; vectors. They perform the assignment:
1849 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1851 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1854 (define_expand "vcond<mode><mode>"
1855 [(set (match_operand:VDQW 0 "s_register_operand" "")
1857 (match_operator 3 "comparison_operator"
1858 [(match_operand:VDQW 4 "s_register_operand" "")
1859 (match_operand:VDQW 5 "nonmemory_operand" "")])
1860 (match_operand:VDQW 1 "s_register_operand" "")
1861 (match_operand:VDQW 2 "s_register_operand" "")))]
1862 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1865 int use_zero_form = 0;
1866 int swap_bsl_operands = 0;
1867 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1868 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1870 rtx (*base_comparison) (rtx, rtx, rtx);
1871 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1873 switch (GET_CODE (operands[3]))
1880 if (operands[5] == CONST0_RTX (<MODE>mode))
1887 if (!REG_P (operands[5]))
1888 operands[5] = force_reg (<MODE>mode, operands[5]);
1891 switch (GET_CODE (operands[3]))
1901 base_comparison = gen_neon_vcge<mode>;
1902 complimentary_comparison = gen_neon_vcgt<mode>;
1910 base_comparison = gen_neon_vcgt<mode>;
1911 complimentary_comparison = gen_neon_vcge<mode>;
1916 base_comparison = gen_neon_vceq<mode>;
1917 complimentary_comparison = gen_neon_vceq<mode>;
1923 switch (GET_CODE (operands[3]))
1930 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1931 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1937 Note that there also exist direct comparison against 0 forms,
1938 so catch those as a special case. */
1942 switch (GET_CODE (operands[3]))
1945 base_comparison = gen_neon_vclt<mode>;
1948 base_comparison = gen_neon_vcle<mode>;
1951 /* Do nothing, other zero form cases already have the correct
1958 emit_insn (base_comparison (mask, operands[4], operands[5]));
1960 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1967 /* Vector compare returns false for lanes which are unordered, so if we use
1968 the inverse of the comparison we actually want to emit, then
1969 swap the operands to BSL, we will end up with the correct result.
1970 Note that a NE NaN and NaN NE b are true for all a, b.
1972 Our transformations are:
1977 a NE b -> !(a EQ b) */
1980 emit_insn (base_comparison (mask, operands[4], operands[5]));
1982 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1984 swap_bsl_operands = 1;
1987 /* We check (a > b || b > a). combining these comparisons give us
1988 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1989 will then give us (a == b || a UNORDERED b) as intended. */
1991 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1992 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1993 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1994 swap_bsl_operands = 1;
1997 /* Operands are ORDERED iff (a > b || b >= a).
1998 Swapping the operands to BSL will give the UNORDERED case. */
1999 swap_bsl_operands = 1;
2002 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2003 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2004 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2010 if (swap_bsl_operands)
2011 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2014 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2019 (define_expand "vcondu<mode><mode>"
2020 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2022 (match_operator 3 "arm_comparison_operator"
2023 [(match_operand:VDQIW 4 "s_register_operand" "")
2024 (match_operand:VDQIW 5 "s_register_operand" "")])
2025 (match_operand:VDQIW 1 "s_register_operand" "")
2026 (match_operand:VDQIW 2 "s_register_operand" "")))]
2030 int inverse = 0, immediate_zero = 0;
2032 mask = gen_reg_rtx (<V_cmp_result>mode);
2034 if (operands[5] == CONST0_RTX (<MODE>mode))
2036 else if (!REG_P (operands[5]))
2037 operands[5] = force_reg (<MODE>mode, operands[5]);
2039 switch (GET_CODE (operands[3]))
2042 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2046 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2050 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2055 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2057 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2062 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2064 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2068 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2077 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2080 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2086 ;; Patterns for builtins.
2088 ; good for plain vadd, vaddq.
2090 (define_expand "neon_vadd<mode>"
2091 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2092 (match_operand:VCVTF 1 "s_register_operand" "w")
2093 (match_operand:VCVTF 2 "s_register_operand" "w")]
2096 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2097 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2099 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2104 (define_expand "neon_vadd<mode>"
2105 [(match_operand:VH 0 "s_register_operand")
2106 (match_operand:VH 1 "s_register_operand")
2107 (match_operand:VH 2 "s_register_operand")]
2108 "TARGET_NEON_FP16INST"
2110 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2114 (define_expand "neon_vsub<mode>"
2115 [(match_operand:VH 0 "s_register_operand")
2116 (match_operand:VH 1 "s_register_operand")
2117 (match_operand:VH 2 "s_register_operand")]
2118 "TARGET_NEON_FP16INST"
2120 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2124 ; Note that NEON operations don't support the full IEEE 754 standard: in
2125 ; particular, denormal values are flushed to zero. This means that GCC cannot
2126 ; use those instructions for autovectorization, etc. unless
2127 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2128 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2129 ; header) must work in either case: if -funsafe-math-optimizations is given,
2130 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2131 ; expand to unspecs (which may potentially limit the extent to which they might
2132 ; be optimized by generic code).
2134 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2136 (define_insn "neon_vadd<mode>_unspec"
2137 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2138 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2139 (match_operand:VCVTF 2 "s_register_operand" "w")]
2142 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2144 (if_then_else (match_test "<Is_float_mode>")
2145 (const_string "neon_fp_addsub_s<q>")
2146 (const_string "neon_add<q>")))]
2149 (define_insn "neon_vaddl<sup><mode>"
2150 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2151 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2152 (match_operand:VDI 2 "s_register_operand" "w")]
2155 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2156 [(set_attr "type" "neon_add_long")]
2159 (define_insn "neon_vaddw<sup><mode>"
2160 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2161 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2162 (match_operand:VDI 2 "s_register_operand" "w")]
2165 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2166 [(set_attr "type" "neon_add_widen")]
2171 (define_insn "neon_v<r>hadd<sup><mode>"
2172 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2173 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2174 (match_operand:VDQIW 2 "s_register_operand" "w")]
2177 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2178 [(set_attr "type" "neon_add_halve_q")]
2181 (define_insn "neon_vqadd<sup><mode>"
2182 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2183 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2184 (match_operand:VDQIX 2 "s_register_operand" "w")]
2187 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2188 [(set_attr "type" "neon_qadd<q>")]
2191 (define_insn "neon_v<r>addhn<mode>"
2192 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2193 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2194 (match_operand:VN 2 "s_register_operand" "w")]
2197 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2198 [(set_attr "type" "neon_add_halve_narrow_q")]
2201 ;; Polynomial and Float multiplication.
2202 (define_insn "neon_vmul<pf><mode>"
2203 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2204 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2205 (match_operand:VPF 2 "s_register_operand" "w")]
2208 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2210 (if_then_else (match_test "<Is_float_mode>")
2211 (const_string "neon_fp_mul_s<q>")
2212 (const_string "neon_mul_<V_elem_ch><q>")))]
2215 (define_insn "mul<mode>3"
2217 (match_operand:VH 0 "s_register_operand" "=w")
2219 (match_operand:VH 1 "s_register_operand" "w")
2220 (match_operand:VH 2 "s_register_operand" "w")))]
2221 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2222 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2223 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2226 (define_insn "neon_vmulf<mode>"
2228 (match_operand:VH 0 "s_register_operand" "=w")
2230 (match_operand:VH 1 "s_register_operand" "w")
2231 (match_operand:VH 2 "s_register_operand" "w")))]
2232 "TARGET_NEON_FP16INST"
2233 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2234 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2237 (define_expand "neon_vmla<mode>"
2238 [(match_operand:VDQW 0 "s_register_operand" "=w")
2239 (match_operand:VDQW 1 "s_register_operand" "0")
2240 (match_operand:VDQW 2 "s_register_operand" "w")
2241 (match_operand:VDQW 3 "s_register_operand" "w")]
2244 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2245 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2246 operands[2], operands[3]));
2248 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2249 operands[2], operands[3]));
2253 (define_expand "neon_vfma<VCVTF:mode>"
2254 [(match_operand:VCVTF 0 "s_register_operand")
2255 (match_operand:VCVTF 1 "s_register_operand")
2256 (match_operand:VCVTF 2 "s_register_operand")
2257 (match_operand:VCVTF 3 "s_register_operand")]
2258 "TARGET_NEON && TARGET_FMA"
2260 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2265 (define_expand "neon_vfma<VH:mode>"
2266 [(match_operand:VH 0 "s_register_operand")
2267 (match_operand:VH 1 "s_register_operand")
2268 (match_operand:VH 2 "s_register_operand")
2269 (match_operand:VH 3 "s_register_operand")]
2270 "TARGET_NEON_FP16INST"
2272 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2277 (define_expand "neon_vfms<VCVTF:mode>"
2278 [(match_operand:VCVTF 0 "s_register_operand")
2279 (match_operand:VCVTF 1 "s_register_operand")
2280 (match_operand:VCVTF 2 "s_register_operand")
2281 (match_operand:VCVTF 3 "s_register_operand")]
2282 "TARGET_NEON && TARGET_FMA"
2284 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2289 (define_expand "neon_vfms<VH:mode>"
2290 [(match_operand:VH 0 "s_register_operand")
2291 (match_operand:VH 1 "s_register_operand")
2292 (match_operand:VH 2 "s_register_operand")
2293 (match_operand:VH 3 "s_register_operand")]
2294 "TARGET_NEON_FP16INST"
2296 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2301 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2303 (define_insn "neon_vmla<mode>_unspec"
2304 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2305 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2306 (match_operand:VDQW 2 "s_register_operand" "w")
2307 (match_operand:VDQW 3 "s_register_operand" "w")]
2310 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2312 (if_then_else (match_test "<Is_float_mode>")
2313 (const_string "neon_fp_mla_s<q>")
2314 (const_string "neon_mla_<V_elem_ch><q>")))]
2317 (define_insn "neon_vmlal<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2320 (match_operand:VW 2 "s_register_operand" "w")
2321 (match_operand:VW 3 "s_register_operand" "w")]
2324 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2325 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2328 (define_expand "neon_vmls<mode>"
2329 [(match_operand:VDQW 0 "s_register_operand" "=w")
2330 (match_operand:VDQW 1 "s_register_operand" "0")
2331 (match_operand:VDQW 2 "s_register_operand" "w")
2332 (match_operand:VDQW 3 "s_register_operand" "w")]
2335 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2336 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2337 operands[1], operands[2], operands[3]));
2339 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2340 operands[2], operands[3]));
2344 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2346 (define_insn "neon_vmls<mode>_unspec"
2347 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2348 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2349 (match_operand:VDQW 2 "s_register_operand" "w")
2350 (match_operand:VDQW 3 "s_register_operand" "w")]
2353 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2355 (if_then_else (match_test "<Is_float_mode>")
2356 (const_string "neon_fp_mla_s<q>")
2357 (const_string "neon_mla_<V_elem_ch><q>")))]
2360 (define_insn "neon_vmlsl<sup><mode>"
2361 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2362 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2363 (match_operand:VW 2 "s_register_operand" "w")
2364 (match_operand:VW 3 "s_register_operand" "w")]
2367 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2368 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2371 ;; vqdmulh, vqrdmulh
2372 (define_insn "neon_vq<r>dmulh<mode>"
2373 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2374 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2375 (match_operand:VMDQI 2 "s_register_operand" "w")]
2378 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2382 ;; vqrdmlah, vqrdmlsh
2383 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2384 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2385 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2386 (match_operand:VMDQI 2 "s_register_operand" "w")
2387 (match_operand:VMDQI 3 "s_register_operand" "w")]
2390 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2391 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2394 (define_insn "neon_vqdmlal<mode>"
2395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2397 (match_operand:VMDI 2 "s_register_operand" "w")
2398 (match_operand:VMDI 3 "s_register_operand" "w")]
2401 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2402 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2405 (define_insn "neon_vqdmlsl<mode>"
2406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2407 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2408 (match_operand:VMDI 2 "s_register_operand" "w")
2409 (match_operand:VMDI 3 "s_register_operand" "w")]
2412 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2413 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2416 (define_insn "neon_vmull<sup><mode>"
2417 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2418 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2419 (match_operand:VW 2 "s_register_operand" "w")]
2422 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2423 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2426 (define_insn "neon_vqdmull<mode>"
2427 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2428 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2429 (match_operand:VMDI 2 "s_register_operand" "w")]
2432 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2433 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2436 (define_expand "neon_vsub<mode>"
2437 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2438 (match_operand:VCVTF 1 "s_register_operand" "w")
2439 (match_operand:VCVTF 2 "s_register_operand" "w")]
2442 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2443 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2445 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2450 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2452 (define_insn "neon_vsub<mode>_unspec"
2453 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2454 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2455 (match_operand:VCVTF 2 "s_register_operand" "w")]
2458 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2460 (if_then_else (match_test "<Is_float_mode>")
2461 (const_string "neon_fp_addsub_s<q>")
2462 (const_string "neon_sub<q>")))]
2465 (define_insn "neon_vsubl<sup><mode>"
2466 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2467 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2468 (match_operand:VDI 2 "s_register_operand" "w")]
2471 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2472 [(set_attr "type" "neon_sub_long")]
2475 (define_insn "neon_vsubw<sup><mode>"
2476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2477 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2478 (match_operand:VDI 2 "s_register_operand" "w")]
2481 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2482 [(set_attr "type" "neon_sub_widen")]
2485 (define_insn "neon_vqsub<sup><mode>"
2486 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2487 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2488 (match_operand:VDQIX 2 "s_register_operand" "w")]
2491 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2492 [(set_attr "type" "neon_qsub<q>")]
2495 (define_insn "neon_vhsub<sup><mode>"
2496 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2497 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2498 (match_operand:VDQIW 2 "s_register_operand" "w")]
2501 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2502 [(set_attr "type" "neon_sub_halve<q>")]
2505 (define_insn "neon_v<r>subhn<mode>"
2506 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2507 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2508 (match_operand:VN 2 "s_register_operand" "w")]
2511 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2512 [(set_attr "type" "neon_sub_halve_narrow_q")]
2515 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2516 ;; without unsafe math optimizations.
2517 (define_expand "neon_vc<cmp_op><mode>"
2518 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2520 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2521 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2524 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2526 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2527 && !flag_unsafe_math_optimizations)
2529 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2530 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2531 whereas this expander iterates over the integer modes as well,
2532 but we will never expand to UNSPECs for the integer comparisons. */
2536 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2541 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2550 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2557 (define_insn "neon_vc<cmp_op><mode>_insn"
2558 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2560 (COMPARISONS:<V_cmp_result>
2561 (match_operand:VDQW 1 "s_register_operand" "w,w")
2562 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2563 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2564 && !flag_unsafe_math_optimizations)"
2567 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2569 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2570 ? "f" : "<cmp_type>",
2571 which_alternative == 0
2572 ? "%<V_reg>2" : "#0");
2573 output_asm_insn (pattern, operands);
2577 (if_then_else (match_operand 2 "zero_operand")
2578 (const_string "neon_compare_zero<q>")
2579 (const_string "neon_compare<q>")))]
2582 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2583 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2584 (unspec:<V_cmp_result>
2585 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2586 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2591 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2593 which_alternative == 0
2594 ? "%<V_reg>2" : "#0");
2595 output_asm_insn (pattern, operands);
2598 [(set_attr "type" "neon_fp_compare_s<q>")]
2601 (define_expand "neon_vc<cmp_op><mode>"
2602 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2605 (match_operand:VH 1 "s_register_operand")
2606 (match_operand:VH 2 "reg_or_zero_operand")))]
2607 "TARGET_NEON_FP16INST"
2609 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2611 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2612 && !flag_unsafe_math_optimizations)
2614 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2615 (operands[0], operands[1], operands[2]));
2618 (gen_neon_vc<cmp_op><mode>_fp16insn
2619 (operands[0], operands[1], operands[2]));
2623 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2624 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2626 (COMPARISONS:<V_cmp_result>
2627 (match_operand:VH 1 "s_register_operand" "w,w")
2628 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2629 "TARGET_NEON_FP16INST
2630 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2631 && !flag_unsafe_math_optimizations)"
2634 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2636 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2637 ? "f" : "<cmp_type>",
2638 which_alternative == 0
2639 ? "%<V_reg>2" : "#0");
2640 output_asm_insn (pattern, operands);
2644 (if_then_else (match_operand 2 "zero_operand")
2645 (const_string "neon_compare_zero<q>")
2646 (const_string "neon_compare<q>")))])
2648 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2650 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2651 (unspec:<V_cmp_result>
2652 [(match_operand:VH 1 "s_register_operand" "w,w")
2653 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2655 "TARGET_NEON_FP16INST"
2658 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2660 which_alternative == 0
2661 ? "%<V_reg>2" : "#0");
2662 output_asm_insn (pattern, operands);
2665 [(set_attr "type" "neon_fp_compare_s<q>")])
2667 (define_insn "neon_vc<cmp_op>u<mode>"
2668 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2670 (GTUGEU:<V_cmp_result>
2671 (match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2674 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2675 [(set_attr "type" "neon_compare<q>")]
2678 (define_expand "neon_vca<cmp_op><mode>"
2679 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2681 (GTGE:<V_cmp_result>
2682 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2683 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2686 if (flag_unsafe_math_optimizations)
2687 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2690 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2697 (define_insn "neon_vca<cmp_op><mode>_insn"
2698 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (GTGE:<V_cmp_result>
2701 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2702 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2703 "TARGET_NEON && flag_unsafe_math_optimizations"
2704 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2705 [(set_attr "type" "neon_fp_compare_s<q>")]
2708 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2709 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2710 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2711 (match_operand:VCVTF 2 "s_register_operand" "w")]
2714 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2715 [(set_attr "type" "neon_fp_compare_s<q>")]
2718 (define_expand "neon_vca<cmp_op><mode>"
2720 (match_operand:<V_cmp_result> 0 "s_register_operand")
2722 (GLTE:<V_cmp_result>
2723 (abs:VH (match_operand:VH 1 "s_register_operand"))
2724 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2725 "TARGET_NEON_FP16INST"
2727 if (flag_unsafe_math_optimizations)
2728 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2729 (operands[0], operands[1], operands[2]));
2731 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2732 (operands[0], operands[1], operands[2]));
2736 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2738 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2740 (GLTE:<V_cmp_result>
2741 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2742 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2743 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2744 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2745 [(set_attr "type" "neon_fp_compare_s<q>")]
2748 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2749 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2750 (unspec:<V_cmp_result>
2751 [(match_operand:VH 1 "s_register_operand" "w")
2752 (match_operand:VH 2 "s_register_operand" "w")]
2755 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2756 [(set_attr "type" "neon_fp_compare_s<q>")]
2759 (define_expand "neon_vc<cmp_op>z<mode>"
2761 (match_operand:<V_cmp_result> 0 "s_register_operand")
2762 (COMPARISONS:<V_cmp_result>
2763 (match_operand:VH 1 "s_register_operand")
2765 "TARGET_NEON_FP16INST"
2767 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2768 CONST0_RTX (<MODE>mode)));
2772 (define_insn "neon_vtst<mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2775 (match_operand:VDQIW 2 "s_register_operand" "w")]
2778 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2779 [(set_attr "type" "neon_tst<q>")]
2782 (define_insn "neon_vabd<sup><mode>"
2783 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2784 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2785 (match_operand:VDQIW 2 "s_register_operand" "w")]
2788 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2789 [(set_attr "type" "neon_abd<q>")]
2792 (define_insn "neon_vabd<mode>"
2793 [(set (match_operand:VH 0 "s_register_operand" "=w")
2794 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2795 (match_operand:VH 2 "s_register_operand" "w")]
2797 "TARGET_NEON_FP16INST"
2798 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2799 [(set_attr "type" "neon_abd<q>")]
2802 (define_insn "neon_vabdf<mode>"
2803 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2804 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2805 (match_operand:VCVTF 2 "s_register_operand" "w")]
2808 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2809 [(set_attr "type" "neon_fp_abd_s<q>")]
2812 (define_insn "neon_vabdl<sup><mode>"
2813 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2814 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2815 (match_operand:VW 2 "s_register_operand" "w")]
2818 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2819 [(set_attr "type" "neon_abd_long")]
2822 (define_insn "neon_vaba<sup><mode>"
2823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2824 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2825 (match_operand:VDQIW 3 "s_register_operand" "w")]
2827 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2829 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2830 [(set_attr "type" "neon_arith_acc<q>")]
2833 (define_insn "neon_vabal<sup><mode>"
2834 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2835 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2836 (match_operand:VW 3 "s_register_operand" "w")]
2838 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2840 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2841 [(set_attr "type" "neon_arith_acc<q>")]
2844 (define_insn "neon_v<maxmin><sup><mode>"
2845 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2846 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2847 (match_operand:VDQIW 2 "s_register_operand" "w")]
2850 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2851 [(set_attr "type" "neon_minmax<q>")]
2854 (define_insn "neon_v<maxmin>f<mode>"
2855 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2856 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2857 (match_operand:VCVTF 2 "s_register_operand" "w")]
2860 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2861 [(set_attr "type" "neon_fp_minmax_s<q>")]
2864 (define_insn "neon_v<maxmin>f<mode>"
2865 [(set (match_operand:VH 0 "s_register_operand" "=w")
2867 [(match_operand:VH 1 "s_register_operand" "w")
2868 (match_operand:VH 2 "s_register_operand" "w")]
2870 "TARGET_NEON_FP16INST"
2871 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2872 [(set_attr "type" "neon_fp_minmax_s<q>")]
2875 (define_insn "neon_vp<maxmin>fv4hf"
2876 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2878 [(match_operand:V4HF 1 "s_register_operand" "w")
2879 (match_operand:V4HF 2 "s_register_operand" "w")]
2881 "TARGET_NEON_FP16INST"
2882 "vp<maxmin>.f16\t%P0, %P1, %P2"
2883 [(set_attr "type" "neon_reduc_minmax")]
2886 (define_insn "neon_<fmaxmin_op><mode>"
2888 (match_operand:VH 0 "s_register_operand" "=w")
2890 [(match_operand:VH 1 "s_register_operand" "w")
2891 (match_operand:VH 2 "s_register_operand" "w")]
2893 "TARGET_NEON_FP16INST"
2894 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2895 [(set_attr "type" "neon_fp_minmax_s<q>")]
2898 ;; v<maxmin>nm intrinsics.
2899 (define_insn "neon_<fmaxmin_op><mode>"
2900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2901 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2902 (match_operand:VCVTF 2 "s_register_operand" "w")]
2904 "TARGET_NEON && TARGET_VFP5"
2905 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_fp_minmax_s<q>")]
2909 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2910 (define_insn "<fmaxmin><mode>3"
2911 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2912 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2913 (match_operand:VCVTF 2 "s_register_operand" "w")]
2915 "TARGET_NEON && TARGET_VFP5"
2916 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_fp_minmax_s<q>")]
2920 (define_expand "neon_vpadd<mode>"
2921 [(match_operand:VD 0 "s_register_operand" "=w")
2922 (match_operand:VD 1 "s_register_operand" "w")
2923 (match_operand:VD 2 "s_register_operand" "w")]
2926 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2931 (define_insn "neon_vpaddl<sup><mode>"
2932 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2933 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2936 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2937 [(set_attr "type" "neon_reduc_add_long")]
2940 (define_insn "neon_vpadal<sup><mode>"
2941 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2942 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2943 (match_operand:VDQIW 2 "s_register_operand" "w")]
2946 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2947 [(set_attr "type" "neon_reduc_add_acc")]
2950 (define_insn "neon_vp<maxmin><sup><mode>"
2951 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2952 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2953 (match_operand:VDI 2 "s_register_operand" "w")]
2956 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2957 [(set_attr "type" "neon_reduc_minmax<q>")]
2960 (define_insn "neon_vp<maxmin>f<mode>"
2961 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2962 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2963 (match_operand:VCVTF 2 "s_register_operand" "w")]
2966 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2967 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2970 (define_insn "neon_vrecps<mode>"
2971 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2972 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2973 (match_operand:VCVTF 2 "s_register_operand" "w")]
2976 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2977 [(set_attr "type" "neon_fp_recps_s<q>")]
2980 (define_insn "neon_vrecps<mode>"
2982 (match_operand:VH 0 "s_register_operand" "=w")
2983 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2984 (match_operand:VH 2 "s_register_operand" "w")]
2986 "TARGET_NEON_FP16INST"
2987 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2988 [(set_attr "type" "neon_fp_recps_s<q>")]
2991 (define_insn "neon_vrsqrts<mode>"
2992 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2993 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2994 (match_operand:VCVTF 2 "s_register_operand" "w")]
2997 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2998 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3001 (define_insn "neon_vrsqrts<mode>"
3003 (match_operand:VH 0 "s_register_operand" "=w")
3004 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3005 (match_operand:VH 2 "s_register_operand" "w")]
3007 "TARGET_NEON_FP16INST"
3008 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3009 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3012 (define_expand "neon_vabs<mode>"
3013 [(match_operand:VDQW 0 "s_register_operand" "")
3014 (match_operand:VDQW 1 "s_register_operand" "")]
3017 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3021 (define_insn "neon_vqabs<mode>"
3022 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3023 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3026 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3027 [(set_attr "type" "neon_qabs<q>")]
3030 (define_insn "neon_bswap<mode>"
3031 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3032 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3034 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3035 [(set_attr "type" "neon_rev<q>")]
3038 (define_expand "neon_vneg<mode>"
3039 [(match_operand:VDQW 0 "s_register_operand" "")
3040 (match_operand:VDQW 1 "s_register_operand" "")]
3043 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3047 (define_expand "neon_copysignf<mode>"
3048 [(match_operand:VCVTF 0 "register_operand")
3049 (match_operand:VCVTF 1 "register_operand")
3050 (match_operand:VCVTF 2 "register_operand")]
3054 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3055 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
3056 rtvec v = rtvec_alloc (n_elt);
3058 /* Create bitmask for vector select. */
3059 for (i = 0; i < n_elt; ++i)
3060 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3062 emit_move_insn (v_bitmask,
3063 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3064 emit_move_insn (operands[0], operands[2]);
3065 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3066 <VCVTF:V_cmp_result>mode, 0);
3067 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3074 (define_insn "neon_vqneg<mode>"
3075 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3076 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3079 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_qneg<q>")]
3083 (define_insn "neon_vcls<mode>"
3084 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3085 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3088 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3089 [(set_attr "type" "neon_cls<q>")]
3092 (define_insn "clz<mode>2"
3093 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3094 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3096 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3097 [(set_attr "type" "neon_cnt<q>")]
3100 (define_expand "neon_vclz<mode>"
3101 [(match_operand:VDQIW 0 "s_register_operand" "")
3102 (match_operand:VDQIW 1 "s_register_operand" "")]
3105 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3109 (define_insn "popcount<mode>2"
3110 [(set (match_operand:VE 0 "s_register_operand" "=w")
3111 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3113 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3114 [(set_attr "type" "neon_cnt<q>")]
3117 (define_expand "neon_vcnt<mode>"
3118 [(match_operand:VE 0 "s_register_operand" "=w")
3119 (match_operand:VE 1 "s_register_operand" "w")]
3122 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3126 (define_insn "neon_vrecpe<mode>"
3127 [(set (match_operand:VH 0 "s_register_operand" "=w")
3128 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3130 "TARGET_NEON_FP16INST"
3131 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3132 [(set_attr "type" "neon_fp_recpe_s<q>")]
3135 (define_insn "neon_vrecpe<mode>"
3136 [(set (match_operand:V32 0 "s_register_operand" "=w")
3137 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3140 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3141 [(set_attr "type" "neon_fp_recpe_s<q>")]
3144 (define_insn "neon_vrsqrte<mode>"
3145 [(set (match_operand:V32 0 "s_register_operand" "=w")
3146 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3149 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3150 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3153 (define_expand "neon_vmvn<mode>"
3154 [(match_operand:VDQIW 0 "s_register_operand" "")
3155 (match_operand:VDQIW 1 "s_register_operand" "")]
3158 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3162 (define_insn "neon_vget_lane<mode>_sext_internal"
3163 [(set (match_operand:SI 0 "s_register_operand" "=r")
3165 (vec_select:<V_elem>
3166 (match_operand:VD 1 "s_register_operand" "w")
3167 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3170 if (BYTES_BIG_ENDIAN)
3172 int elt = INTVAL (operands[2]);
3173 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3174 operands[2] = GEN_INT (elt);
3176 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3178 [(set_attr "type" "neon_to_gp")]
3181 (define_insn "neon_vget_lane<mode>_zext_internal"
3182 [(set (match_operand:SI 0 "s_register_operand" "=r")
3184 (vec_select:<V_elem>
3185 (match_operand:VD 1 "s_register_operand" "w")
3186 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3189 if (BYTES_BIG_ENDIAN)
3191 int elt = INTVAL (operands[2]);
3192 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3193 operands[2] = GEN_INT (elt);
3195 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3197 [(set_attr "type" "neon_to_gp")]
3200 (define_insn "neon_vget_lane<mode>_sext_internal"
3201 [(set (match_operand:SI 0 "s_register_operand" "=r")
3203 (vec_select:<V_elem>
3204 (match_operand:VQ2 1 "s_register_operand" "w")
3205 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3209 int regno = REGNO (operands[1]);
3210 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3211 unsigned int elt = INTVAL (operands[2]);
3212 unsigned int elt_adj = elt % halfelts;
3214 if (BYTES_BIG_ENDIAN)
3215 elt_adj = halfelts - 1 - elt_adj;
3217 ops[0] = operands[0];
3218 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3219 ops[2] = GEN_INT (elt_adj);
3220 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3224 [(set_attr "type" "neon_to_gp_q")]
3227 (define_insn "neon_vget_lane<mode>_zext_internal"
3228 [(set (match_operand:SI 0 "s_register_operand" "=r")
3230 (vec_select:<V_elem>
3231 (match_operand:VQ2 1 "s_register_operand" "w")
3232 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3236 int regno = REGNO (operands[1]);
3237 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3238 unsigned int elt = INTVAL (operands[2]);
3239 unsigned int elt_adj = elt % halfelts;
3241 if (BYTES_BIG_ENDIAN)
3242 elt_adj = halfelts - 1 - elt_adj;
3244 ops[0] = operands[0];
3245 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3246 ops[2] = GEN_INT (elt_adj);
3247 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3251 [(set_attr "type" "neon_to_gp_q")]
3254 (define_expand "neon_vget_lane<mode>"
3255 [(match_operand:<V_ext> 0 "s_register_operand" "")
3256 (match_operand:VDQW 1 "s_register_operand" "")
3257 (match_operand:SI 2 "immediate_operand" "")]
3260 if (BYTES_BIG_ENDIAN)
3262 /* The intrinsics are defined in terms of a model where the
3263 element ordering in memory is vldm order, whereas the generic
3264 RTL is defined in terms of a model where the element ordering
3265 in memory is array order. Convert the lane number to conform
3267 unsigned int elt = INTVAL (operands[2]);
3268 unsigned int reg_nelts
3269 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3270 elt ^= reg_nelts - 1;
3271 operands[2] = GEN_INT (elt);
3274 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3275 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3278 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3284 (define_expand "neon_vget_laneu<mode>"
3285 [(match_operand:<V_ext> 0 "s_register_operand" "")
3286 (match_operand:VDQIW 1 "s_register_operand" "")
3287 (match_operand:SI 2 "immediate_operand" "")]
3290 if (BYTES_BIG_ENDIAN)
3292 /* The intrinsics are defined in terms of a model where the
3293 element ordering in memory is vldm order, whereas the generic
3294 RTL is defined in terms of a model where the element ordering
3295 in memory is array order. Convert the lane number to conform
3297 unsigned int elt = INTVAL (operands[2]);
3298 unsigned int reg_nelts
3299 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3300 elt ^= reg_nelts - 1;
3301 operands[2] = GEN_INT (elt);
3304 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3305 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3308 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3314 (define_expand "neon_vget_lanedi"
3315 [(match_operand:DI 0 "s_register_operand" "=r")
3316 (match_operand:DI 1 "s_register_operand" "w")
3317 (match_operand:SI 2 "immediate_operand" "")]
3320 emit_move_insn (operands[0], operands[1]);
3324 (define_expand "neon_vget_lanev2di"
3325 [(match_operand:DI 0 "s_register_operand" "")
3326 (match_operand:V2DI 1 "s_register_operand" "")
3327 (match_operand:SI 2 "immediate_operand" "")]
3332 if (BYTES_BIG_ENDIAN)
3334 /* The intrinsics are defined in terms of a model where the
3335 element ordering in memory is vldm order, whereas the generic
3336 RTL is defined in terms of a model where the element ordering
3337 in memory is array order. Convert the lane number to conform
3339 unsigned int elt = INTVAL (operands[2]);
3340 unsigned int reg_nelts = 2;
3341 elt ^= reg_nelts - 1;
3342 operands[2] = GEN_INT (elt);
3345 lane = INTVAL (operands[2]);
3346 gcc_assert ((lane ==0) || (lane == 1));
3347 emit_move_insn (operands[0], lane == 0
3348 ? gen_lowpart (DImode, operands[1])
3349 : gen_highpart (DImode, operands[1]));
3353 (define_expand "neon_vset_lane<mode>"
3354 [(match_operand:VDQ 0 "s_register_operand" "=w")
3355 (match_operand:<V_elem> 1 "s_register_operand" "r")
3356 (match_operand:VDQ 2 "s_register_operand" "0")
3357 (match_operand:SI 3 "immediate_operand" "i")]
3360 unsigned int elt = INTVAL (operands[3]);
3362 if (BYTES_BIG_ENDIAN)
3364 unsigned int reg_nelts
3365 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3366 elt ^= reg_nelts - 1;
3369 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3370 GEN_INT (1 << elt), operands[2]));
3374 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3376 (define_expand "neon_vset_lanedi"
3377 [(match_operand:DI 0 "s_register_operand" "=w")
3378 (match_operand:DI 1 "s_register_operand" "r")
3379 (match_operand:DI 2 "s_register_operand" "0")
3380 (match_operand:SI 3 "immediate_operand" "i")]
3383 emit_move_insn (operands[0], operands[1]);
3387 (define_expand "neon_vcreate<mode>"
3388 [(match_operand:VD_RE 0 "s_register_operand" "")
3389 (match_operand:DI 1 "general_operand" "")]
3392 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3393 emit_move_insn (operands[0], src);
3397 (define_insn "neon_vdup_n<mode>"
3398 [(set (match_operand:VX 0 "s_register_operand" "=w")
3399 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3401 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3402 [(set_attr "type" "neon_from_gp<q>")]
3405 (define_insn "neon_vdup_nv4hf"
3406 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3407 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3410 [(set_attr "type" "neon_from_gp")]
3413 (define_insn "neon_vdup_nv8hf"
3414 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3415 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3418 [(set_attr "type" "neon_from_gp_q")]
3421 (define_insn "neon_vdup_n<mode>"
3422 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3423 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3426 vdup.<V_sz_elem>\t%<V_reg>0, %1
3427 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3428 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3431 (define_expand "neon_vdup_ndi"
3432 [(match_operand:DI 0 "s_register_operand" "=w")
3433 (match_operand:DI 1 "s_register_operand" "r")]
3436 emit_move_insn (operands[0], operands[1]);
3441 (define_insn "neon_vdup_nv2di"
3442 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3443 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3446 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3447 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3448 [(set_attr "length" "8")
3449 (set_attr "type" "multiple")]
3452 (define_insn "neon_vdup_lane<mode>_internal"
3453 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3455 (vec_select:<V_elem>
3456 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3457 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3460 if (BYTES_BIG_ENDIAN)
3462 int elt = INTVAL (operands[2]);
3463 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3464 operands[2] = GEN_INT (elt);
3467 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3469 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3471 [(set_attr "type" "neon_dup<q>")]
3474 (define_insn "neon_vdup_lane<mode>_internal"
3475 [(set (match_operand:VH 0 "s_register_operand" "=w")
3477 (vec_select:<V_elem>
3478 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3479 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3480 "TARGET_NEON && TARGET_FP16"
3482 if (BYTES_BIG_ENDIAN)
3484 int elt = INTVAL (operands[2]);
3485 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3486 operands[2] = GEN_INT (elt);
3489 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3491 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3493 [(set_attr "type" "neon_dup<q>")]
3496 (define_expand "neon_vdup_lane<mode>"
3497 [(match_operand:VDQW 0 "s_register_operand" "=w")
3498 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3499 (match_operand:SI 2 "immediate_operand" "i")]
3502 if (BYTES_BIG_ENDIAN)
3504 unsigned int elt = INTVAL (operands[2]);
3505 unsigned int reg_nelts
3506 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3507 elt ^= reg_nelts - 1;
3508 operands[2] = GEN_INT (elt);
3510 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3515 (define_expand "neon_vdup_lane<mode>"
3516 [(match_operand:VH 0 "s_register_operand")
3517 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3518 (match_operand:SI 2 "immediate_operand")]
3519 "TARGET_NEON && TARGET_FP16"
3521 if (BYTES_BIG_ENDIAN)
3523 unsigned int elt = INTVAL (operands[2]);
3524 unsigned int reg_nelts
3525 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3526 elt ^= reg_nelts - 1;
3527 operands[2] = GEN_INT (elt);
3529 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3534 ; Scalar index is ignored, since only zero is valid here.
3535 (define_expand "neon_vdup_lanedi"
3536 [(match_operand:DI 0 "s_register_operand" "=w")
3537 (match_operand:DI 1 "s_register_operand" "w")
3538 (match_operand:SI 2 "immediate_operand" "i")]
3541 emit_move_insn (operands[0], operands[1]);
3545 ; Likewise for v2di, as the DImode second operand has only a single element.
3546 (define_expand "neon_vdup_lanev2di"
3547 [(match_operand:V2DI 0 "s_register_operand" "=w")
3548 (match_operand:DI 1 "s_register_operand" "w")
3549 (match_operand:SI 2 "immediate_operand" "i")]
3552 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3556 ; Disabled before reload because we don't want combine doing something silly,
3557 ; but used by the post-reload expansion of neon_vcombine.
3558 (define_insn "*neon_vswp<mode>"
3559 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3560 (match_operand:VDQX 1 "s_register_operand" "+w"))
3561 (set (match_dup 1) (match_dup 0))]
3562 "TARGET_NEON && reload_completed"
3563 "vswp\t%<V_reg>0, %<V_reg>1"
3564 [(set_attr "type" "neon_permute<q>")]
3567 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3569 ;; FIXME: A different implementation of this builtin could make it much
3570 ;; more likely that we wouldn't actually need to output anything (we could make
3571 ;; it so that the reg allocator puts things in the right places magically
3572 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3574 (define_insn_and_split "neon_vcombine<mode>"
3575 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3576 (vec_concat:<V_DOUBLE>
3577 (match_operand:VDX 1 "s_register_operand" "w")
3578 (match_operand:VDX 2 "s_register_operand" "w")))]
3581 "&& reload_completed"
3584 neon_split_vcombine (operands);
3587 [(set_attr "type" "multiple")]
3590 (define_expand "neon_vget_high<mode>"
3591 [(match_operand:<V_HALF> 0 "s_register_operand")
3592 (match_operand:VQX 1 "s_register_operand")]
3595 emit_move_insn (operands[0],
3596 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3597 GET_MODE_SIZE (<V_HALF>mode)));
3601 (define_expand "neon_vget_low<mode>"
3602 [(match_operand:<V_HALF> 0 "s_register_operand")
3603 (match_operand:VQX 1 "s_register_operand")]
3606 emit_move_insn (operands[0],
3607 simplify_gen_subreg (<V_HALF>mode, operands[1],
3612 (define_insn "float<mode><V_cvtto>2"
3613 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3614 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3615 "TARGET_NEON && !flag_rounding_math"
3616 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3617 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3620 (define_insn "floatuns<mode><V_cvtto>2"
3621 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3622 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3623 "TARGET_NEON && !flag_rounding_math"
3624 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3625 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3628 (define_insn "fix_trunc<mode><V_cvtto>2"
3629 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3630 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3632 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3633 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3636 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3637 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3638 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3640 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3641 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3644 (define_insn "neon_vcvt<sup><mode>"
3645 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3646 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3649 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3650 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3653 (define_insn "neon_vcvt<sup><mode>"
3654 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3655 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3658 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3659 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3662 (define_insn "neon_vcvtv4sfv4hf"
3663 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3664 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3666 "TARGET_NEON && TARGET_FP16"
3667 "vcvt.f32.f16\t%q0, %P1"
3668 [(set_attr "type" "neon_fp_cvt_widen_h")]
3671 (define_insn "neon_vcvtv4hfv4sf"
3672 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3673 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3675 "TARGET_NEON && TARGET_FP16"
3676 "vcvt.f16.f32\t%P0, %q1"
3677 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3680 (define_insn "neon_vcvt<sup><mode>"
3682 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3684 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3686 "TARGET_NEON_FP16INST"
3687 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3688 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3691 (define_insn "neon_vcvt<sup><mode>"
3693 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3695 [(match_operand:VH 1 "s_register_operand" "w")]
3697 "TARGET_NEON_FP16INST"
3698 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3699 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3702 (define_insn "neon_vcvt<sup>_n<mode>"
3703 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3704 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3705 (match_operand:SI 2 "immediate_operand" "i")]
3709 arm_const_bounds (operands[2], 1, 33);
3710 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3712 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3715 (define_insn "neon_vcvt<sup>_n<mode>"
3716 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3718 [(match_operand:VH 1 "s_register_operand" "w")
3719 (match_operand:SI 2 "immediate_operand" "i")]
3721 "TARGET_NEON_FP16INST"
3723 arm_const_bounds (operands[2], 0, 17);
3724 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3726 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3729 (define_insn "neon_vcvt<sup>_n<mode>"
3730 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3731 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3732 (match_operand:SI 2 "immediate_operand" "i")]
3736 arm_const_bounds (operands[2], 1, 33);
3737 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3739 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3742 (define_insn "neon_vcvt<sup>_n<mode>"
3743 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3745 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3746 (match_operand:SI 2 "immediate_operand" "i")]
3748 "TARGET_NEON_FP16INST"
3750 arm_const_bounds (operands[2], 0, 17);
3751 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3753 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3756 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3758 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3760 [(match_operand:VH 1 "s_register_operand" "w")]
3762 "TARGET_NEON_FP16INST"
3763 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3764 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3767 (define_insn "neon_vmovn<mode>"
3768 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3769 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3772 "vmovn.<V_if_elem>\t%P0, %q1"
3773 [(set_attr "type" "neon_shift_imm_narrow_q")]
3776 (define_insn "neon_vqmovn<sup><mode>"
3777 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3778 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3781 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3782 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3785 (define_insn "neon_vqmovun<mode>"
3786 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3787 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3790 "vqmovun.<V_s_elem>\t%P0, %q1"
3791 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3794 (define_insn "neon_vmovl<sup><mode>"
3795 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3796 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3799 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3800 [(set_attr "type" "neon_shift_imm_long")]
3803 (define_insn "neon_vmul_lane<mode>"
3804 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3805 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3806 (match_operand:VMD 2 "s_register_operand"
3807 "<scalar_mul_constraint>")
3808 (match_operand:SI 3 "immediate_operand" "i")]
3812 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3815 (if_then_else (match_test "<Is_float_mode>")
3816 (const_string "neon_fp_mul_s_scalar<q>")
3817 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3820 (define_insn "neon_vmul_lane<mode>"
3821 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3822 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3823 (match_operand:<V_HALF> 2 "s_register_operand"
3824 "<scalar_mul_constraint>")
3825 (match_operand:SI 3 "immediate_operand" "i")]
3829 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3832 (if_then_else (match_test "<Is_float_mode>")
3833 (const_string "neon_fp_mul_s_scalar<q>")
3834 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3837 (define_insn "neon_vmul_lane<mode>"
3838 [(set (match_operand:VH 0 "s_register_operand" "=w")
3839 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3840 (match_operand:V4HF 2 "s_register_operand"
3841 "<scalar_mul_constraint>")
3842 (match_operand:SI 3 "immediate_operand" "i")]
3844 "TARGET_NEON_FP16INST"
3845 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3846 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3849 (define_insn "neon_vmull<sup>_lane<mode>"
3850 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3851 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3852 (match_operand:VMDI 2 "s_register_operand"
3853 "<scalar_mul_constraint>")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3858 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3860 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3863 (define_insn "neon_vqdmull_lane<mode>"
3864 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3865 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3866 (match_operand:VMDI 2 "s_register_operand"
3867 "<scalar_mul_constraint>")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3869 UNSPEC_VQDMULL_LANE))]
3872 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3874 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3877 (define_insn "neon_vq<r>dmulh_lane<mode>"
3878 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3879 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3880 (match_operand:<V_HALF> 2 "s_register_operand"
3881 "<scalar_mul_constraint>")
3882 (match_operand:SI 3 "immediate_operand" "i")]
3886 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3888 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3891 (define_insn "neon_vq<r>dmulh_lane<mode>"
3892 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3893 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3894 (match_operand:VMDI 2 "s_register_operand"
3895 "<scalar_mul_constraint>")
3896 (match_operand:SI 3 "immediate_operand" "i")]
3900 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3902 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3905 ;; vqrdmlah_lane, vqrdmlsh_lane
3906 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3907 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3908 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3909 (match_operand:VMQI 2 "s_register_operand" "w")
3910 (match_operand:<V_HALF> 3 "s_register_operand"
3911 "<scalar_mul_constraint>")
3912 (match_operand:SI 4 "immediate_operand" "i")]
3917 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3919 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3922 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3923 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3924 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3925 (match_operand:VMDI 2 "s_register_operand" "w")
3926 (match_operand:VMDI 3 "s_register_operand"
3927 "<scalar_mul_constraint>")
3928 (match_operand:SI 4 "immediate_operand" "i")]
3933 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3935 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3938 (define_insn "neon_vmla_lane<mode>"
3939 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3940 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3941 (match_operand:VMD 2 "s_register_operand" "w")
3942 (match_operand:VMD 3 "s_register_operand"
3943 "<scalar_mul_constraint>")
3944 (match_operand:SI 4 "immediate_operand" "i")]
3948 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3951 (if_then_else (match_test "<Is_float_mode>")
3952 (const_string "neon_fp_mla_s_scalar<q>")
3953 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3956 (define_insn "neon_vmla_lane<mode>"
3957 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3958 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3959 (match_operand:VMQ 2 "s_register_operand" "w")
3960 (match_operand:<V_HALF> 3 "s_register_operand"
3961 "<scalar_mul_constraint>")
3962 (match_operand:SI 4 "immediate_operand" "i")]
3966 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3969 (if_then_else (match_test "<Is_float_mode>")
3970 (const_string "neon_fp_mla_s_scalar<q>")
3971 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3974 (define_insn "neon_vmlal<sup>_lane<mode>"
3975 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3976 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3977 (match_operand:VMDI 2 "s_register_operand" "w")
3978 (match_operand:VMDI 3 "s_register_operand"
3979 "<scalar_mul_constraint>")
3980 (match_operand:SI 4 "immediate_operand" "i")]
3984 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3986 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3989 (define_insn "neon_vqdmlal_lane<mode>"
3990 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3991 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3992 (match_operand:VMDI 2 "s_register_operand" "w")
3993 (match_operand:VMDI 3 "s_register_operand"
3994 "<scalar_mul_constraint>")
3995 (match_operand:SI 4 "immediate_operand" "i")]
3996 UNSPEC_VQDMLAL_LANE))]
3999 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4001 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4004 (define_insn "neon_vmls_lane<mode>"
4005 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4006 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4007 (match_operand:VMD 2 "s_register_operand" "w")
4008 (match_operand:VMD 3 "s_register_operand"
4009 "<scalar_mul_constraint>")
4010 (match_operand:SI 4 "immediate_operand" "i")]
4014 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4017 (if_then_else (match_test "<Is_float_mode>")
4018 (const_string "neon_fp_mla_s_scalar<q>")
4019 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4022 (define_insn "neon_vmls_lane<mode>"
4023 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4024 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4025 (match_operand:VMQ 2 "s_register_operand" "w")
4026 (match_operand:<V_HALF> 3 "s_register_operand"
4027 "<scalar_mul_constraint>")
4028 (match_operand:SI 4 "immediate_operand" "i")]
4032 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4035 (if_then_else (match_test "<Is_float_mode>")
4036 (const_string "neon_fp_mla_s_scalar<q>")
4037 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4040 (define_insn "neon_vmlsl<sup>_lane<mode>"
4041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4043 (match_operand:VMDI 2 "s_register_operand" "w")
4044 (match_operand:VMDI 3 "s_register_operand"
4045 "<scalar_mul_constraint>")
4046 (match_operand:SI 4 "immediate_operand" "i")]
4050 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4052 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4055 (define_insn "neon_vqdmlsl_lane<mode>"
4056 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4057 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4058 (match_operand:VMDI 2 "s_register_operand" "w")
4059 (match_operand:VMDI 3 "s_register_operand"
4060 "<scalar_mul_constraint>")
4061 (match_operand:SI 4 "immediate_operand" "i")]
4062 UNSPEC_VQDMLSL_LANE))]
4065 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4067 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4070 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4071 ; core register into a temp register, then use a scalar taken from that. This
4072 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4073 ; or extracted from another vector. The latter case it's currently better to
4074 ; use the "_lane" variant, and the former case can probably be implemented
4075 ; using vld1_lane, but that hasn't been done yet.
4077 (define_expand "neon_vmul_n<mode>"
4078 [(match_operand:VMD 0 "s_register_operand" "")
4079 (match_operand:VMD 1 "s_register_operand" "")
4080 (match_operand:<V_elem> 2 "s_register_operand" "")]
4083 rtx tmp = gen_reg_rtx (<MODE>mode);
4084 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4085 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4090 (define_expand "neon_vmul_n<mode>"
4091 [(match_operand:VMQ 0 "s_register_operand" "")
4092 (match_operand:VMQ 1 "s_register_operand" "")
4093 (match_operand:<V_elem> 2 "s_register_operand" "")]
4096 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4097 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4098 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4103 (define_expand "neon_vmul_n<mode>"
4104 [(match_operand:VH 0 "s_register_operand")
4105 (match_operand:VH 1 "s_register_operand")
4106 (match_operand:<V_elem> 2 "s_register_operand")]
4107 "TARGET_NEON_FP16INST"
4109 rtx tmp = gen_reg_rtx (V4HFmode);
4110 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4111 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4116 (define_expand "neon_vmulls_n<mode>"
4117 [(match_operand:<V_widen> 0 "s_register_operand" "")
4118 (match_operand:VMDI 1 "s_register_operand" "")
4119 (match_operand:<V_elem> 2 "s_register_operand" "")]
4122 rtx tmp = gen_reg_rtx (<MODE>mode);
4123 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4124 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4129 (define_expand "neon_vmullu_n<mode>"
4130 [(match_operand:<V_widen> 0 "s_register_operand" "")
4131 (match_operand:VMDI 1 "s_register_operand" "")
4132 (match_operand:<V_elem> 2 "s_register_operand" "")]
4135 rtx tmp = gen_reg_rtx (<MODE>mode);
4136 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4137 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4142 (define_expand "neon_vqdmull_n<mode>"
4143 [(match_operand:<V_widen> 0 "s_register_operand" "")
4144 (match_operand:VMDI 1 "s_register_operand" "")
4145 (match_operand:<V_elem> 2 "s_register_operand" "")]
4148 rtx tmp = gen_reg_rtx (<MODE>mode);
4149 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4150 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4155 (define_expand "neon_vqdmulh_n<mode>"
4156 [(match_operand:VMDI 0 "s_register_operand" "")
4157 (match_operand:VMDI 1 "s_register_operand" "")
4158 (match_operand:<V_elem> 2 "s_register_operand" "")]
4161 rtx tmp = gen_reg_rtx (<MODE>mode);
4162 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4163 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4168 (define_expand "neon_vqrdmulh_n<mode>"
4169 [(match_operand:VMDI 0 "s_register_operand" "")
4170 (match_operand:VMDI 1 "s_register_operand" "")
4171 (match_operand:<V_elem> 2 "s_register_operand" "")]
4174 rtx tmp = gen_reg_rtx (<MODE>mode);
4175 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4176 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4181 (define_expand "neon_vqdmulh_n<mode>"
4182 [(match_operand:VMQI 0 "s_register_operand" "")
4183 (match_operand:VMQI 1 "s_register_operand" "")
4184 (match_operand:<V_elem> 2 "s_register_operand" "")]
4187 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4188 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4189 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4194 (define_expand "neon_vqrdmulh_n<mode>"
4195 [(match_operand:VMQI 0 "s_register_operand" "")
4196 (match_operand:VMQI 1 "s_register_operand" "")
4197 (match_operand:<V_elem> 2 "s_register_operand" "")]
4200 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4201 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4202 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4207 (define_expand "neon_vmla_n<mode>"
4208 [(match_operand:VMD 0 "s_register_operand" "")
4209 (match_operand:VMD 1 "s_register_operand" "")
4210 (match_operand:VMD 2 "s_register_operand" "")
4211 (match_operand:<V_elem> 3 "s_register_operand" "")]
4214 rtx tmp = gen_reg_rtx (<MODE>mode);
4215 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4216 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4221 (define_expand "neon_vmla_n<mode>"
4222 [(match_operand:VMQ 0 "s_register_operand" "")
4223 (match_operand:VMQ 1 "s_register_operand" "")
4224 (match_operand:VMQ 2 "s_register_operand" "")
4225 (match_operand:<V_elem> 3 "s_register_operand" "")]
4228 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4229 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4230 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4235 (define_expand "neon_vmlals_n<mode>"
4236 [(match_operand:<V_widen> 0 "s_register_operand" "")
4237 (match_operand:<V_widen> 1 "s_register_operand" "")
4238 (match_operand:VMDI 2 "s_register_operand" "")
4239 (match_operand:<V_elem> 3 "s_register_operand" "")]
4242 rtx tmp = gen_reg_rtx (<MODE>mode);
4243 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4244 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4249 (define_expand "neon_vmlalu_n<mode>"
4250 [(match_operand:<V_widen> 0 "s_register_operand" "")
4251 (match_operand:<V_widen> 1 "s_register_operand" "")
4252 (match_operand:VMDI 2 "s_register_operand" "")
4253 (match_operand:<V_elem> 3 "s_register_operand" "")]
4256 rtx tmp = gen_reg_rtx (<MODE>mode);
4257 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4258 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4263 (define_expand "neon_vqdmlal_n<mode>"
4264 [(match_operand:<V_widen> 0 "s_register_operand" "")
4265 (match_operand:<V_widen> 1 "s_register_operand" "")
4266 (match_operand:VMDI 2 "s_register_operand" "")
4267 (match_operand:<V_elem> 3 "s_register_operand" "")]
4270 rtx tmp = gen_reg_rtx (<MODE>mode);
4271 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4272 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4277 (define_expand "neon_vmls_n<mode>"
4278 [(match_operand:VMD 0 "s_register_operand" "")
4279 (match_operand:VMD 1 "s_register_operand" "")
4280 (match_operand:VMD 2 "s_register_operand" "")
4281 (match_operand:<V_elem> 3 "s_register_operand" "")]
4284 rtx tmp = gen_reg_rtx (<MODE>mode);
4285 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4286 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4291 (define_expand "neon_vmls_n<mode>"
4292 [(match_operand:VMQ 0 "s_register_operand" "")
4293 (match_operand:VMQ 1 "s_register_operand" "")
4294 (match_operand:VMQ 2 "s_register_operand" "")
4295 (match_operand:<V_elem> 3 "s_register_operand" "")]
4298 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4299 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4300 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4305 (define_expand "neon_vmlsls_n<mode>"
4306 [(match_operand:<V_widen> 0 "s_register_operand" "")
4307 (match_operand:<V_widen> 1 "s_register_operand" "")
4308 (match_operand:VMDI 2 "s_register_operand" "")
4309 (match_operand:<V_elem> 3 "s_register_operand" "")]
4312 rtx tmp = gen_reg_rtx (<MODE>mode);
4313 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4314 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4319 (define_expand "neon_vmlslu_n<mode>"
4320 [(match_operand:<V_widen> 0 "s_register_operand" "")
4321 (match_operand:<V_widen> 1 "s_register_operand" "")
4322 (match_operand:VMDI 2 "s_register_operand" "")
4323 (match_operand:<V_elem> 3 "s_register_operand" "")]
4326 rtx tmp = gen_reg_rtx (<MODE>mode);
4327 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4328 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4333 (define_expand "neon_vqdmlsl_n<mode>"
4334 [(match_operand:<V_widen> 0 "s_register_operand" "")
4335 (match_operand:<V_widen> 1 "s_register_operand" "")
4336 (match_operand:VMDI 2 "s_register_operand" "")
4337 (match_operand:<V_elem> 3 "s_register_operand" "")]
4340 rtx tmp = gen_reg_rtx (<MODE>mode);
4341 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4342 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4347 (define_insn "neon_vext<mode>"
4348 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4349 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4350 (match_operand:VDQX 2 "s_register_operand" "w")
4351 (match_operand:SI 3 "immediate_operand" "i")]
4355 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4356 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4358 [(set_attr "type" "neon_ext<q>")]
4361 (define_insn "neon_vrev64<mode>"
4362 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4363 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4366 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4367 [(set_attr "type" "neon_rev<q>")]
4370 (define_insn "neon_vrev32<mode>"
4371 [(set (match_operand:VX 0 "s_register_operand" "=w")
4372 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4375 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4376 [(set_attr "type" "neon_rev<q>")]
4379 (define_insn "neon_vrev16<mode>"
4380 [(set (match_operand:VE 0 "s_register_operand" "=w")
4381 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4384 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4385 [(set_attr "type" "neon_rev<q>")]
4388 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4389 ; allocation. For an intrinsic of form:
4390 ; rD = vbsl_* (rS, rN, rM)
4391 ; We can use any of:
4392 ; vbsl rS, rN, rM (if D = S)
4393 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4394 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4396 (define_insn "neon_vbsl<mode>_internal"
4397 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4398 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4399 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4400 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4404 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4405 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4406 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4407 [(set_attr "type" "neon_bsl<q>")]
4410 (define_expand "neon_vbsl<mode>"
4411 [(set (match_operand:VDQX 0 "s_register_operand" "")
4412 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4413 (match_operand:VDQX 2 "s_register_operand" "")
4414 (match_operand:VDQX 3 "s_register_operand" "")]
4418 /* We can't alias operands together if they have different modes. */
4419 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4423 (define_insn "neon_v<shift_op><sup><mode>"
4424 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4425 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4426 (match_operand:VDQIX 2 "s_register_operand" "w")]
4429 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4430 [(set_attr "type" "neon_shift_imm<q>")]
4434 (define_insn "neon_v<shift_op><sup><mode>"
4435 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4436 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4437 (match_operand:VDQIX 2 "s_register_operand" "w")]
4440 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4441 [(set_attr "type" "neon_sat_shift_imm<q>")]
4445 (define_insn "neon_v<shift_op><sup>_n<mode>"
4446 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4447 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4448 (match_operand:SI 2 "immediate_operand" "i")]
4452 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4453 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4455 [(set_attr "type" "neon_shift_imm<q>")]
4458 ;; vshrn_n, vrshrn_n
4459 (define_insn "neon_v<shift_op>_n<mode>"
4460 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4461 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4462 (match_operand:SI 2 "immediate_operand" "i")]
4466 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4467 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4469 [(set_attr "type" "neon_shift_imm_narrow_q")]
4472 ;; vqshrn_n, vqrshrn_n
4473 (define_insn "neon_v<shift_op><sup>_n<mode>"
4474 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4475 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4476 (match_operand:SI 2 "immediate_operand" "i")]
4480 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4481 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4483 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4486 ;; vqshrun_n, vqrshrun_n
4487 (define_insn "neon_v<shift_op>_n<mode>"
4488 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4489 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4494 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4495 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4497 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4500 (define_insn "neon_vshl_n<mode>"
4501 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4502 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4503 (match_operand:SI 2 "immediate_operand" "i")]
4507 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4508 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4510 [(set_attr "type" "neon_shift_imm<q>")]
4513 (define_insn "neon_vqshl_<sup>_n<mode>"
4514 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4515 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4516 (match_operand:SI 2 "immediate_operand" "i")]
4520 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4521 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4523 [(set_attr "type" "neon_sat_shift_imm<q>")]
4526 (define_insn "neon_vqshlu_n<mode>"
4527 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4528 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4529 (match_operand:SI 2 "immediate_operand" "i")]
4533 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4534 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4536 [(set_attr "type" "neon_sat_shift_imm<q>")]
4539 (define_insn "neon_vshll<sup>_n<mode>"
4540 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4541 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4542 (match_operand:SI 2 "immediate_operand" "i")]
4546 /* The boundaries are: 0 < imm <= size. */
4547 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4548 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4550 [(set_attr "type" "neon_shift_imm_long")]
4554 (define_insn "neon_v<shift_op><sup>_n<mode>"
4555 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4556 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4557 (match_operand:VDQIX 2 "s_register_operand" "w")
4558 (match_operand:SI 3 "immediate_operand" "i")]
4562 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4563 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4565 [(set_attr "type" "neon_shift_acc<q>")]
4568 (define_insn "neon_vsri_n<mode>"
4569 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4570 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4571 (match_operand:VDQIX 2 "s_register_operand" "w")
4572 (match_operand:SI 3 "immediate_operand" "i")]
4576 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4577 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4579 [(set_attr "type" "neon_shift_reg<q>")]
4582 (define_insn "neon_vsli_n<mode>"
4583 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4584 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4585 (match_operand:VDQIX 2 "s_register_operand" "w")
4586 (match_operand:SI 3 "immediate_operand" "i")]
4590 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4591 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4593 [(set_attr "type" "neon_shift_reg<q>")]
4596 (define_insn "neon_vtbl1v8qi"
4597 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4598 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4599 (match_operand:V8QI 2 "s_register_operand" "w")]
4602 "vtbl.8\t%P0, {%P1}, %P2"
4603 [(set_attr "type" "neon_tbl1")]
4606 (define_insn "neon_vtbl2v8qi"
4607 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4608 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4609 (match_operand:V8QI 2 "s_register_operand" "w")]
4614 int tabbase = REGNO (operands[1]);
4616 ops[0] = operands[0];
4617 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4618 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4619 ops[3] = operands[2];
4620 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4624 [(set_attr "type" "neon_tbl2")]
4627 (define_insn "neon_vtbl3v8qi"
4628 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4629 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4630 (match_operand:V8QI 2 "s_register_operand" "w")]
4635 int tabbase = REGNO (operands[1]);
4637 ops[0] = operands[0];
4638 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4639 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4640 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4641 ops[4] = operands[2];
4642 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4646 [(set_attr "type" "neon_tbl3")]
4649 (define_insn "neon_vtbl4v8qi"
4650 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4651 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4652 (match_operand:V8QI 2 "s_register_operand" "w")]
4657 int tabbase = REGNO (operands[1]);
4659 ops[0] = operands[0];
4660 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4661 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4662 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4663 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4664 ops[5] = operands[2];
4665 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4669 [(set_attr "type" "neon_tbl4")]
4672 ;; These three are used by the vec_perm infrastructure for V16QImode.
4673 (define_insn_and_split "neon_vtbl1v16qi"
4674 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4675 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4676 (match_operand:V16QI 2 "s_register_operand" "w")]
4680 "&& reload_completed"
4683 rtx op0, op1, op2, part0, part2;
4687 op1 = gen_lowpart (TImode, operands[1]);
4690 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4691 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4692 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4693 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4695 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4696 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4697 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4698 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4701 [(set_attr "type" "multiple")]
4704 (define_insn_and_split "neon_vtbl2v16qi"
4705 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4706 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4707 (match_operand:V16QI 2 "s_register_operand" "w")]
4711 "&& reload_completed"
4714 rtx op0, op1, op2, part0, part2;
4721 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4722 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4723 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4724 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4726 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4727 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4728 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4729 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4732 [(set_attr "type" "multiple")]
4735 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4736 ;; handle quad-word input modes, producing octa-word output modes. But
4737 ;; that requires us to add support for octa-word vector modes in moves.
4738 ;; That seems overkill for this one use in vec_perm.
4739 (define_insn_and_split "neon_vcombinev16qi"
4740 [(set (match_operand:OI 0 "s_register_operand" "=w")
4741 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4742 (match_operand:V16QI 2 "s_register_operand" "w")]
4746 "&& reload_completed"
4749 neon_split_vcombine (operands);
4752 [(set_attr "type" "multiple")]
4755 (define_insn "neon_vtbx1v8qi"
4756 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4757 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4758 (match_operand:V8QI 2 "s_register_operand" "w")
4759 (match_operand:V8QI 3 "s_register_operand" "w")]
4762 "vtbx.8\t%P0, {%P2}, %P3"
4763 [(set_attr "type" "neon_tbl1")]
4766 (define_insn "neon_vtbx2v8qi"
4767 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4768 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4769 (match_operand:TI 2 "s_register_operand" "w")
4770 (match_operand:V8QI 3 "s_register_operand" "w")]
4775 int tabbase = REGNO (operands[2]);
4777 ops[0] = operands[0];
4778 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4779 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4780 ops[3] = operands[3];
4781 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4785 [(set_attr "type" "neon_tbl2")]
4788 (define_insn "neon_vtbx3v8qi"
4789 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4790 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4791 (match_operand:EI 2 "s_register_operand" "w")
4792 (match_operand:V8QI 3 "s_register_operand" "w")]
4797 int tabbase = REGNO (operands[2]);
4799 ops[0] = operands[0];
4800 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4801 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4802 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4803 ops[4] = operands[3];
4804 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4808 [(set_attr "type" "neon_tbl3")]
4811 (define_insn "neon_vtbx4v8qi"
4812 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4813 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4814 (match_operand:OI 2 "s_register_operand" "w")
4815 (match_operand:V8QI 3 "s_register_operand" "w")]
4820 int tabbase = REGNO (operands[2]);
4822 ops[0] = operands[0];
4823 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4824 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4825 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4826 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4827 ops[5] = operands[3];
4828 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4832 [(set_attr "type" "neon_tbl4")]
4835 (define_expand "neon_vtrn<mode>_internal"
4837 [(set (match_operand:VDQWH 0 "s_register_operand")
4838 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4839 (match_operand:VDQWH 2 "s_register_operand")]
4841 (set (match_operand:VDQWH 3 "s_register_operand")
4842 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4847 ;; Note: Different operand numbering to handle tied registers correctly.
4848 (define_insn "*neon_vtrn<mode>_insn"
4849 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4850 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4851 (match_operand:VDQWH 3 "s_register_operand" "2")]
4853 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4854 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4857 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4858 [(set_attr "type" "neon_permute<q>")]
4861 (define_expand "neon_vzip<mode>_internal"
4863 [(set (match_operand:VDQWH 0 "s_register_operand")
4864 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4865 (match_operand:VDQWH 2 "s_register_operand")]
4867 (set (match_operand:VDQWH 3 "s_register_operand")
4868 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4873 ;; Note: Different operand numbering to handle tied registers correctly.
4874 (define_insn "*neon_vzip<mode>_insn"
4875 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4876 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4877 (match_operand:VDQWH 3 "s_register_operand" "2")]
4879 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4880 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4883 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4884 [(set_attr "type" "neon_zip<q>")]
4887 (define_expand "neon_vuzp<mode>_internal"
4889 [(set (match_operand:VDQWH 0 "s_register_operand")
4890 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4891 (match_operand:VDQWH 2 "s_register_operand")]
4893 (set (match_operand:VDQWH 3 "s_register_operand" "")
4894 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4899 ;; Note: Different operand numbering to handle tied registers correctly.
4900 (define_insn "*neon_vuzp<mode>_insn"
4901 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4902 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4903 (match_operand:VDQWH 3 "s_register_operand" "2")]
4905 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4906 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4909 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4910 [(set_attr "type" "neon_zip<q>")]
4913 (define_expand "vec_load_lanes<mode><mode>"
4914 [(set (match_operand:VDQX 0 "s_register_operand")
4915 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4919 (define_insn "neon_vld1<mode>"
4920 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4921 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4924 "vld1.<V_sz_elem>\t%h0, %A1"
4925 [(set_attr "type" "neon_load1_1reg<q>")]
4928 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4929 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4931 (define_insn "neon_vld1_lane<mode>"
4932 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4933 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4934 (match_operand:VDX 2 "s_register_operand" "0")
4935 (match_operand:SI 3 "immediate_operand" "i")]
4939 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4940 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4941 operands[3] = GEN_INT (lane);
4943 return "vld1.<V_sz_elem>\t%P0, %A1";
4945 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4947 [(set_attr "type" "neon_load1_one_lane<q>")]
4950 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4951 ;; here on big endian targets.
4952 (define_insn "neon_vld1_lane<mode>"
4953 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4954 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4955 (match_operand:VQX 2 "s_register_operand" "0")
4956 (match_operand:SI 3 "immediate_operand" "i")]
4960 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4961 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4962 operands[3] = GEN_INT (lane);
4963 int regno = REGNO (operands[0]);
4964 if (lane >= max / 2)
4968 operands[3] = GEN_INT (lane);
4970 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4972 return "vld1.<V_sz_elem>\t%P0, %A1";
4974 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4976 [(set_attr "type" "neon_load1_one_lane<q>")]
4979 (define_insn "neon_vld1_dup<mode>"
4980 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4981 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4983 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4984 [(set_attr "type" "neon_load1_all_lanes<q>")]
4987 ;; Special case for DImode. Treat it exactly like a simple load.
4988 (define_expand "neon_vld1_dupdi"
4989 [(set (match_operand:DI 0 "s_register_operand" "")
4990 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4996 (define_insn "neon_vld1_dup<mode>"
4997 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4998 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5001 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5003 [(set_attr "type" "neon_load1_all_lanes<q>")]
5006 (define_insn_and_split "neon_vld1_dupv2di"
5007 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5008 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5011 "&& reload_completed"
5014 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5015 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5016 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5019 [(set_attr "length" "8")
5020 (set_attr "type" "neon_load1_all_lanes_q")]
5023 (define_expand "vec_store_lanes<mode><mode>"
5024 [(set (match_operand:VDQX 0 "neon_struct_operand")
5025 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5029 (define_insn "neon_vst1<mode>"
5030 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5031 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5034 "vst1.<V_sz_elem>\t%h1, %A0"
5035 [(set_attr "type" "neon_store1_1reg<q>")])
5037 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5038 ;; here on big endian targets.
5039 (define_insn "neon_vst1_lane<mode>"
5040 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5042 [(match_operand:VDX 1 "s_register_operand" "w")
5043 (match_operand:SI 2 "immediate_operand" "i")]
5047 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5048 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5049 operands[2] = GEN_INT (lane);
5051 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5053 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5055 [(set_attr "type" "neon_store1_one_lane<q>")]
5058 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5059 ;; here on big endian targets.
5060 (define_insn "neon_vst1_lane<mode>"
5061 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5063 [(match_operand:VQX 1 "s_register_operand" "w")
5064 (match_operand:SI 2 "immediate_operand" "i")]
5068 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5069 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5070 int regno = REGNO (operands[1]);
5071 if (lane >= max / 2)
5076 operands[2] = GEN_INT (lane);
5077 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5079 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5081 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5083 [(set_attr "type" "neon_store1_one_lane<q>")]
5086 (define_expand "vec_load_lanesti<mode>"
5087 [(set (match_operand:TI 0 "s_register_operand")
5088 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5089 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5093 (define_insn "neon_vld2<mode>"
5094 [(set (match_operand:TI 0 "s_register_operand" "=w")
5095 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5096 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5100 if (<V_sz_elem> == 64)
5101 return "vld1.64\t%h0, %A1";
5103 return "vld2.<V_sz_elem>\t%h0, %A1";
5106 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5107 (const_string "neon_load1_2reg<q>")
5108 (const_string "neon_load2_2reg<q>")))]
5111 (define_expand "vec_load_lanesoi<mode>"
5112 [(set (match_operand:OI 0 "s_register_operand")
5113 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5114 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5118 (define_insn "neon_vld2<mode>"
5119 [(set (match_operand:OI 0 "s_register_operand" "=w")
5120 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5121 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5124 "vld2.<V_sz_elem>\t%h0, %A1"
5125 [(set_attr "type" "neon_load2_2reg_q")])
5127 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5128 ;; here on big endian targets.
5129 (define_insn "neon_vld2_lane<mode>"
5130 [(set (match_operand:TI 0 "s_register_operand" "=w")
5131 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5132 (match_operand:TI 2 "s_register_operand" "0")
5133 (match_operand:SI 3 "immediate_operand" "i")
5134 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5138 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5139 int regno = REGNO (operands[0]);
5141 ops[0] = gen_rtx_REG (DImode, regno);
5142 ops[1] = gen_rtx_REG (DImode, regno + 2);
5143 ops[2] = operands[1];
5144 ops[3] = GEN_INT (lane);
5145 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5148 [(set_attr "type" "neon_load2_one_lane<q>")]
5151 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5152 ;; here on big endian targets.
5153 (define_insn "neon_vld2_lane<mode>"
5154 [(set (match_operand:OI 0 "s_register_operand" "=w")
5155 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5156 (match_operand:OI 2 "s_register_operand" "0")
5157 (match_operand:SI 3 "immediate_operand" "i")
5158 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5162 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5163 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5164 int regno = REGNO (operands[0]);
5166 if (lane >= max / 2)
5171 ops[0] = gen_rtx_REG (DImode, regno);
5172 ops[1] = gen_rtx_REG (DImode, regno + 4);
5173 ops[2] = operands[1];
5174 ops[3] = GEN_INT (lane);
5175 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5178 [(set_attr "type" "neon_load2_one_lane<q>")]
5181 (define_insn "neon_vld2_dup<mode>"
5182 [(set (match_operand:TI 0 "s_register_operand" "=w")
5183 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5184 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5188 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5189 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5191 return "vld1.<V_sz_elem>\t%h0, %A1";
5194 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5195 (const_string "neon_load2_all_lanes<q>")
5196 (const_string "neon_load1_1reg<q>")))]
5199 (define_expand "vec_store_lanesti<mode>"
5200 [(set (match_operand:TI 0 "neon_struct_operand")
5201 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5202 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5206 (define_insn "neon_vst2<mode>"
5207 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5208 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5209 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213 if (<V_sz_elem> == 64)
5214 return "vst1.64\t%h1, %A0";
5216 return "vst2.<V_sz_elem>\t%h1, %A0";
5219 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5220 (const_string "neon_store1_2reg<q>")
5221 (const_string "neon_store2_one_lane<q>")))]
5224 (define_expand "vec_store_lanesoi<mode>"
5225 [(set (match_operand:OI 0 "neon_struct_operand")
5226 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5227 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5231 (define_insn "neon_vst2<mode>"
5232 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5233 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5234 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5237 "vst2.<V_sz_elem>\t%h1, %A0"
5238 [(set_attr "type" "neon_store2_4reg<q>")]
5241 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5242 ;; here on big endian targets.
5243 (define_insn "neon_vst2_lane<mode>"
5244 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5245 (unspec:<V_two_elem>
5246 [(match_operand:TI 1 "s_register_operand" "w")
5247 (match_operand:SI 2 "immediate_operand" "i")
5248 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5253 int regno = REGNO (operands[1]);
5255 ops[0] = operands[0];
5256 ops[1] = gen_rtx_REG (DImode, regno);
5257 ops[2] = gen_rtx_REG (DImode, regno + 2);
5258 ops[3] = GEN_INT (lane);
5259 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5262 [(set_attr "type" "neon_store2_one_lane<q>")]
5265 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5266 ;; here on big endian targets.
5267 (define_insn "neon_vst2_lane<mode>"
5268 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5269 (unspec:<V_two_elem>
5270 [(match_operand:OI 1 "s_register_operand" "w")
5271 (match_operand:SI 2 "immediate_operand" "i")
5272 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5276 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5277 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5278 int regno = REGNO (operands[1]);
5280 if (lane >= max / 2)
5285 ops[0] = operands[0];
5286 ops[1] = gen_rtx_REG (DImode, regno);
5287 ops[2] = gen_rtx_REG (DImode, regno + 4);
5288 ops[3] = GEN_INT (lane);
5289 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5292 [(set_attr "type" "neon_store2_one_lane<q>")]
5295 (define_expand "vec_load_lanesei<mode>"
5296 [(set (match_operand:EI 0 "s_register_operand")
5297 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5298 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5302 (define_insn "neon_vld3<mode>"
5303 [(set (match_operand:EI 0 "s_register_operand" "=w")
5304 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5305 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5309 if (<V_sz_elem> == 64)
5310 return "vld1.64\t%h0, %A1";
5312 return "vld3.<V_sz_elem>\t%h0, %A1";
5315 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5316 (const_string "neon_load1_3reg<q>")
5317 (const_string "neon_load3_3reg<q>")))]
5320 (define_expand "vec_load_lanesci<mode>"
5321 [(match_operand:CI 0 "s_register_operand")
5322 (match_operand:CI 1 "neon_struct_operand")
5323 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5326 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5330 (define_expand "neon_vld3<mode>"
5331 [(match_operand:CI 0 "s_register_operand")
5332 (match_operand:CI 1 "neon_struct_operand")
5333 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5338 mem = adjust_address (operands[1], EImode, 0);
5339 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5340 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5341 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5345 (define_insn "neon_vld3qa<mode>"
5346 [(set (match_operand:CI 0 "s_register_operand" "=w")
5347 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5348 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5352 int regno = REGNO (operands[0]);
5354 ops[0] = gen_rtx_REG (DImode, regno);
5355 ops[1] = gen_rtx_REG (DImode, regno + 4);
5356 ops[2] = gen_rtx_REG (DImode, regno + 8);
5357 ops[3] = operands[1];
5358 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5361 [(set_attr "type" "neon_load3_3reg<q>")]
5364 (define_insn "neon_vld3qb<mode>"
5365 [(set (match_operand:CI 0 "s_register_operand" "=w")
5366 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5367 (match_operand:CI 2 "s_register_operand" "0")
5368 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5372 int regno = REGNO (operands[0]);
5374 ops[0] = gen_rtx_REG (DImode, regno + 2);
5375 ops[1] = gen_rtx_REG (DImode, regno + 6);
5376 ops[2] = gen_rtx_REG (DImode, regno + 10);
5377 ops[3] = operands[1];
5378 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5381 [(set_attr "type" "neon_load3_3reg<q>")]
5384 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5385 ;; here on big endian targets.
5386 (define_insn "neon_vld3_lane<mode>"
5387 [(set (match_operand:EI 0 "s_register_operand" "=w")
5388 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5389 (match_operand:EI 2 "s_register_operand" "0")
5390 (match_operand:SI 3 "immediate_operand" "i")
5391 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5395 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5396 int regno = REGNO (operands[0]);
5398 ops[0] = gen_rtx_REG (DImode, regno);
5399 ops[1] = gen_rtx_REG (DImode, regno + 2);
5400 ops[2] = gen_rtx_REG (DImode, regno + 4);
5401 ops[3] = operands[1];
5402 ops[4] = GEN_INT (lane);
5403 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5407 [(set_attr "type" "neon_load3_one_lane<q>")]
5410 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5411 ;; here on big endian targets.
5412 (define_insn "neon_vld3_lane<mode>"
5413 [(set (match_operand:CI 0 "s_register_operand" "=w")
5414 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5415 (match_operand:CI 2 "s_register_operand" "0")
5416 (match_operand:SI 3 "immediate_operand" "i")
5417 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5421 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5422 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5423 int regno = REGNO (operands[0]);
5425 if (lane >= max / 2)
5430 ops[0] = gen_rtx_REG (DImode, regno);
5431 ops[1] = gen_rtx_REG (DImode, regno + 4);
5432 ops[2] = gen_rtx_REG (DImode, regno + 8);
5433 ops[3] = operands[1];
5434 ops[4] = GEN_INT (lane);
5435 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5439 [(set_attr "type" "neon_load3_one_lane<q>")]
5442 (define_insn "neon_vld3_dup<mode>"
5443 [(set (match_operand:EI 0 "s_register_operand" "=w")
5444 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5445 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5449 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5451 int regno = REGNO (operands[0]);
5453 ops[0] = gen_rtx_REG (DImode, regno);
5454 ops[1] = gen_rtx_REG (DImode, regno + 2);
5455 ops[2] = gen_rtx_REG (DImode, regno + 4);
5456 ops[3] = operands[1];
5457 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5461 return "vld1.<V_sz_elem>\t%h0, %A1";
5464 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5465 (const_string "neon_load3_all_lanes<q>")
5466 (const_string "neon_load1_1reg<q>")))])
5468 (define_expand "vec_store_lanesei<mode>"
5469 [(set (match_operand:EI 0 "neon_struct_operand")
5470 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5471 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5475 (define_insn "neon_vst3<mode>"
5476 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5477 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5478 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5482 if (<V_sz_elem> == 64)
5483 return "vst1.64\t%h1, %A0";
5485 return "vst3.<V_sz_elem>\t%h1, %A0";
5488 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5489 (const_string "neon_store1_3reg<q>")
5490 (const_string "neon_store3_one_lane<q>")))])
5492 (define_expand "vec_store_lanesci<mode>"
5493 [(match_operand:CI 0 "neon_struct_operand")
5494 (match_operand:CI 1 "s_register_operand")
5495 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5498 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5502 (define_expand "neon_vst3<mode>"
5503 [(match_operand:CI 0 "neon_struct_operand")
5504 (match_operand:CI 1 "s_register_operand")
5505 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5510 mem = adjust_address (operands[0], EImode, 0);
5511 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5512 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5513 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5517 (define_insn "neon_vst3qa<mode>"
5518 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5519 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5520 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5524 int regno = REGNO (operands[1]);
5526 ops[0] = operands[0];
5527 ops[1] = gen_rtx_REG (DImode, regno);
5528 ops[2] = gen_rtx_REG (DImode, regno + 4);
5529 ops[3] = gen_rtx_REG (DImode, regno + 8);
5530 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5533 [(set_attr "type" "neon_store3_3reg<q>")]
5536 (define_insn "neon_vst3qb<mode>"
5537 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5538 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5539 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5543 int regno = REGNO (operands[1]);
5545 ops[0] = operands[0];
5546 ops[1] = gen_rtx_REG (DImode, regno + 2);
5547 ops[2] = gen_rtx_REG (DImode, regno + 6);
5548 ops[3] = gen_rtx_REG (DImode, regno + 10);
5549 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5552 [(set_attr "type" "neon_store3_3reg<q>")]
5555 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5556 ;; here on big endian targets.
5557 (define_insn "neon_vst3_lane<mode>"
5558 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5559 (unspec:<V_three_elem>
5560 [(match_operand:EI 1 "s_register_operand" "w")
5561 (match_operand:SI 2 "immediate_operand" "i")
5562 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5566 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5567 int regno = REGNO (operands[1]);
5569 ops[0] = operands[0];
5570 ops[1] = gen_rtx_REG (DImode, regno);
5571 ops[2] = gen_rtx_REG (DImode, regno + 2);
5572 ops[3] = gen_rtx_REG (DImode, regno + 4);
5573 ops[4] = GEN_INT (lane);
5574 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5578 [(set_attr "type" "neon_store3_one_lane<q>")]
5581 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5582 ;; here on big endian targets.
5583 (define_insn "neon_vst3_lane<mode>"
5584 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5585 (unspec:<V_three_elem>
5586 [(match_operand:CI 1 "s_register_operand" "w")
5587 (match_operand:SI 2 "immediate_operand" "i")
5588 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5592 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5593 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5594 int regno = REGNO (operands[1]);
5596 if (lane >= max / 2)
5601 ops[0] = operands[0];
5602 ops[1] = gen_rtx_REG (DImode, regno);
5603 ops[2] = gen_rtx_REG (DImode, regno + 4);
5604 ops[3] = gen_rtx_REG (DImode, regno + 8);
5605 ops[4] = GEN_INT (lane);
5606 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5610 [(set_attr "type" "neon_store3_one_lane<q>")]
5613 (define_expand "vec_load_lanesoi<mode>"
5614 [(set (match_operand:OI 0 "s_register_operand")
5615 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5616 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5620 (define_insn "neon_vld4<mode>"
5621 [(set (match_operand:OI 0 "s_register_operand" "=w")
5622 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5623 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5627 if (<V_sz_elem> == 64)
5628 return "vld1.64\t%h0, %A1";
5630 return "vld4.<V_sz_elem>\t%h0, %A1";
5633 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5634 (const_string "neon_load1_4reg<q>")
5635 (const_string "neon_load4_4reg<q>")))]
5638 (define_expand "vec_load_lanesxi<mode>"
5639 [(match_operand:XI 0 "s_register_operand")
5640 (match_operand:XI 1 "neon_struct_operand")
5641 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5644 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5648 (define_expand "neon_vld4<mode>"
5649 [(match_operand:XI 0 "s_register_operand")
5650 (match_operand:XI 1 "neon_struct_operand")
5651 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5656 mem = adjust_address (operands[1], OImode, 0);
5657 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5658 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5659 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5663 (define_insn "neon_vld4qa<mode>"
5664 [(set (match_operand:XI 0 "s_register_operand" "=w")
5665 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5666 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5670 int regno = REGNO (operands[0]);
5672 ops[0] = gen_rtx_REG (DImode, regno);
5673 ops[1] = gen_rtx_REG (DImode, regno + 4);
5674 ops[2] = gen_rtx_REG (DImode, regno + 8);
5675 ops[3] = gen_rtx_REG (DImode, regno + 12);
5676 ops[4] = operands[1];
5677 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5680 [(set_attr "type" "neon_load4_4reg<q>")]
5683 (define_insn "neon_vld4qb<mode>"
5684 [(set (match_operand:XI 0 "s_register_operand" "=w")
5685 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5686 (match_operand:XI 2 "s_register_operand" "0")
5687 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5691 int regno = REGNO (operands[0]);
5693 ops[0] = gen_rtx_REG (DImode, regno + 2);
5694 ops[1] = gen_rtx_REG (DImode, regno + 6);
5695 ops[2] = gen_rtx_REG (DImode, regno + 10);
5696 ops[3] = gen_rtx_REG (DImode, regno + 14);
5697 ops[4] = operands[1];
5698 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5701 [(set_attr "type" "neon_load4_4reg<q>")]
5704 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5705 ;; here on big endian targets.
5706 (define_insn "neon_vld4_lane<mode>"
5707 [(set (match_operand:OI 0 "s_register_operand" "=w")
5708 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5709 (match_operand:OI 2 "s_register_operand" "0")
5710 (match_operand:SI 3 "immediate_operand" "i")
5711 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5716 int regno = REGNO (operands[0]);
5718 ops[0] = gen_rtx_REG (DImode, regno);
5719 ops[1] = gen_rtx_REG (DImode, regno + 2);
5720 ops[2] = gen_rtx_REG (DImode, regno + 4);
5721 ops[3] = gen_rtx_REG (DImode, regno + 6);
5722 ops[4] = operands[1];
5723 ops[5] = GEN_INT (lane);
5724 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5728 [(set_attr "type" "neon_load4_one_lane<q>")]
5731 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5732 ;; here on big endian targets.
5733 (define_insn "neon_vld4_lane<mode>"
5734 [(set (match_operand:XI 0 "s_register_operand" "=w")
5735 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5736 (match_operand:XI 2 "s_register_operand" "0")
5737 (match_operand:SI 3 "immediate_operand" "i")
5738 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5742 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5743 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5744 int regno = REGNO (operands[0]);
5746 if (lane >= max / 2)
5751 ops[0] = gen_rtx_REG (DImode, regno);
5752 ops[1] = gen_rtx_REG (DImode, regno + 4);
5753 ops[2] = gen_rtx_REG (DImode, regno + 8);
5754 ops[3] = gen_rtx_REG (DImode, regno + 12);
5755 ops[4] = operands[1];
5756 ops[5] = GEN_INT (lane);
5757 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5761 [(set_attr "type" "neon_load4_one_lane<q>")]
5764 (define_insn "neon_vld4_dup<mode>"
5765 [(set (match_operand:OI 0 "s_register_operand" "=w")
5766 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5767 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5771 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5773 int regno = REGNO (operands[0]);
5775 ops[0] = gen_rtx_REG (DImode, regno);
5776 ops[1] = gen_rtx_REG (DImode, regno + 2);
5777 ops[2] = gen_rtx_REG (DImode, regno + 4);
5778 ops[3] = gen_rtx_REG (DImode, regno + 6);
5779 ops[4] = operands[1];
5780 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5785 return "vld1.<V_sz_elem>\t%h0, %A1";
5788 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5789 (const_string "neon_load4_all_lanes<q>")
5790 (const_string "neon_load1_1reg<q>")))]
5793 (define_expand "vec_store_lanesoi<mode>"
5794 [(set (match_operand:OI 0 "neon_struct_operand")
5795 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5796 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5800 (define_insn "neon_vst4<mode>"
5801 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5802 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5803 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5807 if (<V_sz_elem> == 64)
5808 return "vst1.64\t%h1, %A0";
5810 return "vst4.<V_sz_elem>\t%h1, %A0";
5813 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5814 (const_string "neon_store1_4reg<q>")
5815 (const_string "neon_store4_4reg<q>")))]
5818 (define_expand "vec_store_lanesxi<mode>"
5819 [(match_operand:XI 0 "neon_struct_operand")
5820 (match_operand:XI 1 "s_register_operand")
5821 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5824 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5828 (define_expand "neon_vst4<mode>"
5829 [(match_operand:XI 0 "neon_struct_operand")
5830 (match_operand:XI 1 "s_register_operand")
5831 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5836 mem = adjust_address (operands[0], OImode, 0);
5837 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5838 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5839 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5843 (define_insn "neon_vst4qa<mode>"
5844 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5845 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5846 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5850 int regno = REGNO (operands[1]);
5852 ops[0] = operands[0];
5853 ops[1] = gen_rtx_REG (DImode, regno);
5854 ops[2] = gen_rtx_REG (DImode, regno + 4);
5855 ops[3] = gen_rtx_REG (DImode, regno + 8);
5856 ops[4] = gen_rtx_REG (DImode, regno + 12);
5857 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5860 [(set_attr "type" "neon_store4_4reg<q>")]
5863 (define_insn "neon_vst4qb<mode>"
5864 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5865 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5866 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5870 int regno = REGNO (operands[1]);
5872 ops[0] = operands[0];
5873 ops[1] = gen_rtx_REG (DImode, regno + 2);
5874 ops[2] = gen_rtx_REG (DImode, regno + 6);
5875 ops[3] = gen_rtx_REG (DImode, regno + 10);
5876 ops[4] = gen_rtx_REG (DImode, regno + 14);
5877 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5880 [(set_attr "type" "neon_store4_4reg<q>")]
5883 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5884 ;; here on big endian targets.
5885 (define_insn "neon_vst4_lane<mode>"
5886 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5887 (unspec:<V_four_elem>
5888 [(match_operand:OI 1 "s_register_operand" "w")
5889 (match_operand:SI 2 "immediate_operand" "i")
5890 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5894 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5895 int regno = REGNO (operands[1]);
5897 ops[0] = operands[0];
5898 ops[1] = gen_rtx_REG (DImode, regno);
5899 ops[2] = gen_rtx_REG (DImode, regno + 2);
5900 ops[3] = gen_rtx_REG (DImode, regno + 4);
5901 ops[4] = gen_rtx_REG (DImode, regno + 6);
5902 ops[5] = GEN_INT (lane);
5903 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5907 [(set_attr "type" "neon_store4_one_lane<q>")]
5910 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5911 ;; here on big endian targets.
5912 (define_insn "neon_vst4_lane<mode>"
5913 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5914 (unspec:<V_four_elem>
5915 [(match_operand:XI 1 "s_register_operand" "w")
5916 (match_operand:SI 2 "immediate_operand" "i")
5917 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5921 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5922 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5923 int regno = REGNO (operands[1]);
5925 if (lane >= max / 2)
5930 ops[0] = operands[0];
5931 ops[1] = gen_rtx_REG (DImode, regno);
5932 ops[2] = gen_rtx_REG (DImode, regno + 4);
5933 ops[3] = gen_rtx_REG (DImode, regno + 8);
5934 ops[4] = gen_rtx_REG (DImode, regno + 12);
5935 ops[5] = GEN_INT (lane);
5936 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5940 [(set_attr "type" "neon_store4_4reg<q>")]
5943 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5944 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5945 (SE:<V_unpack> (vec_select:<V_HALF>
5946 (match_operand:VU 1 "register_operand" "w")
5947 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5948 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5949 "vmovl.<US><V_sz_elem> %q0, %e1"
5950 [(set_attr "type" "neon_shift_imm_long")]
5953 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5954 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5955 (SE:<V_unpack> (vec_select:<V_HALF>
5956 (match_operand:VU 1 "register_operand" "w")
5957 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5958 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5959 "vmovl.<US><V_sz_elem> %q0, %f1"
5960 [(set_attr "type" "neon_shift_imm_long")]
5963 (define_expand "vec_unpack<US>_hi_<mode>"
5964 [(match_operand:<V_unpack> 0 "register_operand" "")
5965 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5966 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5968 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5971 for (i = 0; i < (<V_mode_nunits>/2); i++)
5972 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5974 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5975 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5982 (define_expand "vec_unpack<US>_lo_<mode>"
5983 [(match_operand:<V_unpack> 0 "register_operand" "")
5984 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5985 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5987 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5990 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5991 RTVEC_ELT (v, i) = GEN_INT (i);
5992 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5993 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6000 (define_insn "neon_vec_<US>mult_lo_<mode>"
6001 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6002 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6003 (match_operand:VU 1 "register_operand" "w")
6004 (match_operand:VU 2 "vect_par_constant_low" "")))
6005 (SE:<V_unpack> (vec_select:<V_HALF>
6006 (match_operand:VU 3 "register_operand" "w")
6008 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6009 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6010 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6013 (define_expand "vec_widen_<US>mult_lo_<mode>"
6014 [(match_operand:<V_unpack> 0 "register_operand" "")
6015 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6016 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6017 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6019 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6022 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6023 RTVEC_ELT (v, i) = GEN_INT (i);
6024 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6026 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6034 (define_insn "neon_vec_<US>mult_hi_<mode>"
6035 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6036 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6037 (match_operand:VU 1 "register_operand" "w")
6038 (match_operand:VU 2 "vect_par_constant_high" "")))
6039 (SE:<V_unpack> (vec_select:<V_HALF>
6040 (match_operand:VU 3 "register_operand" "w")
6042 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6043 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6044 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6047 (define_expand "vec_widen_<US>mult_hi_<mode>"
6048 [(match_operand:<V_unpack> 0 "register_operand" "")
6049 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6050 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6051 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6053 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6056 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6057 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6058 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6060 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6069 (define_insn "neon_vec_<US>shiftl_<mode>"
6070 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6071 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6072 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6075 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6077 [(set_attr "type" "neon_shift_imm_long")]
6080 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6081 [(match_operand:<V_unpack> 0 "register_operand" "")
6082 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6083 (match_operand:SI 2 "immediate_operand" "i")]
6084 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6086 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6087 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6093 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6094 [(match_operand:<V_unpack> 0 "register_operand" "")
6095 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6096 (match_operand:SI 2 "immediate_operand" "i")]
6097 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6099 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6100 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6101 GET_MODE_SIZE (<V_HALF>mode)),
6107 ;; Vectorize for non-neon-quad case
6108 (define_insn "neon_unpack<US>_<mode>"
6109 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6110 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6112 "vmovl.<US><V_sz_elem> %q0, %P1"
6113 [(set_attr "type" "neon_move")]
6116 (define_expand "vec_unpack<US>_lo_<mode>"
6117 [(match_operand:<V_double_width> 0 "register_operand" "")
6118 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6121 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6122 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6123 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6129 (define_expand "vec_unpack<US>_hi_<mode>"
6130 [(match_operand:<V_double_width> 0 "register_operand" "")
6131 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6134 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6135 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6136 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6142 (define_insn "neon_vec_<US>mult_<mode>"
6143 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6144 (mult:<V_widen> (SE:<V_widen>
6145 (match_operand:VDI 1 "register_operand" "w"))
6147 (match_operand:VDI 2 "register_operand" "w"))))]
6149 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6150 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6153 (define_expand "vec_widen_<US>mult_hi_<mode>"
6154 [(match_operand:<V_double_width> 0 "register_operand" "")
6155 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6156 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6159 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6160 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6161 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6168 (define_expand "vec_widen_<US>mult_lo_<mode>"
6169 [(match_operand:<V_double_width> 0 "register_operand" "")
6170 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6171 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6174 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6175 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6176 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6183 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6184 [(match_operand:<V_double_width> 0 "register_operand" "")
6185 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6186 (match_operand:SI 2 "immediate_operand" "i")]
6189 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6190 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6191 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6197 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6198 [(match_operand:<V_double_width> 0 "register_operand" "")
6199 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6200 (match_operand:SI 2 "immediate_operand" "i")]
6203 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6204 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6205 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6211 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6212 ; because the ordering of vector elements in Q registers is different from what
6213 ; the semantics of the instructions require.
6215 (define_insn "vec_pack_trunc_<mode>"
6216 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6217 (vec_concat:<V_narrow_pack>
6218 (truncate:<V_narrow>
6219 (match_operand:VN 1 "register_operand" "w"))
6220 (truncate:<V_narrow>
6221 (match_operand:VN 2 "register_operand" "w"))))]
6222 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6223 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6224 [(set_attr "type" "multiple")
6225 (set_attr "length" "8")]
6228 ;; For the non-quad case.
6229 (define_insn "neon_vec_pack_trunc_<mode>"
6230 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6231 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6232 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6233 "vmovn.i<V_sz_elem>\t%P0, %q1"
6234 [(set_attr "type" "neon_move_narrow_q")]
6237 (define_expand "vec_pack_trunc_<mode>"
6238 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6239 (match_operand:VSHFT 1 "register_operand" "")
6240 (match_operand:VSHFT 2 "register_operand")]
6241 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6243 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6245 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6246 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6247 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6251 (define_insn "neon_vabd<mode>_2"
6252 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6253 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6254 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6255 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6256 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6258 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6259 (const_string "neon_fp_abd_s<q>")
6260 (const_string "neon_abd<q>")))]
6263 (define_insn "neon_vabd<mode>_3"
6264 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6265 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6266 (match_operand:VDQ 2 "s_register_operand" "w")]
6268 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6269 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6271 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6272 (const_string "neon_fp_abd_s<q>")
6273 (const_string "neon_abd<q>")))]
6276 ;; Copy from core-to-neon regs, then extend, not vice-versa
6279 [(set (match_operand:DI 0 "s_register_operand" "")
6280 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6281 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6282 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6283 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6285 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6289 [(set (match_operand:DI 0 "s_register_operand" "")
6290 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6291 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6292 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6293 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6295 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6299 [(set (match_operand:DI 0 "s_register_operand" "")
6300 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6301 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6302 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6303 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6305 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6309 [(set (match_operand:DI 0 "s_register_operand" "")
6310 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6311 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6312 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6313 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6315 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6319 [(set (match_operand:DI 0 "s_register_operand" "")
6320 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6321 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6322 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6323 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6325 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6329 [(set (match_operand:DI 0 "s_register_operand" "")
6330 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6331 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6332 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6333 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6335 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));