1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
511 (define_insn "add<mode>3"
513 (match_operand:VH 0 "s_register_operand" "=w")
515 (match_operand:VH 1 "s_register_operand" "w")
516 (match_operand:VH 2 "s_register_operand" "w")))]
517 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
520 (if_then_else (match_test "<Is_float_mode>")
521 (const_string "neon_fp_addsub_s<q>")
522 (const_string "neon_add<q>")))]
525 (define_insn "add<mode>3_fp16"
527 (match_operand:VH 0 "s_register_operand" "=w")
529 (match_operand:VH 1 "s_register_operand" "w")
530 (match_operand:VH 2 "s_register_operand" "w")))]
531 "TARGET_NEON_FP16INST"
532 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
534 (if_then_else (match_test "<Is_float_mode>")
535 (const_string "neon_fp_addsub_s<q>")
536 (const_string "neon_add<q>")))]
539 (define_insn "adddi3_neon"
540 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
543 (clobber (reg:CC CC_REGNUM))]
546 switch (which_alternative)
548 case 0: /* fall through */
549 case 3: return "vadd.i64\t%P0, %P1, %P2";
555 default: gcc_unreachable ();
558 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559 multiple,multiple,multiple")
560 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561 (set_attr "length" "*,8,8,*,8,8,8")
562 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
565 (define_insn "*sub<mode>3_neon"
566 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568 (match_operand:VDQ 2 "s_register_operand" "w")))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_addsub_s<q>")
574 (const_string "neon_sub<q>")))]
577 (define_insn "sub<mode>3"
579 (match_operand:VH 0 "s_register_operand" "=w")
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
588 (define_insn "sub<mode>3_fp16"
590 (match_operand:VH 0 "s_register_operand" "=w")
592 (match_operand:VH 1 "s_register_operand" "w")
593 (match_operand:VH 2 "s_register_operand" "w")))]
594 "TARGET_NEON_FP16INST"
595 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596 [(set_attr "type" "neon_sub<q>")]
599 (define_insn "subdi3_neon"
600 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603 (clobber (reg:CC CC_REGNUM))]
606 switch (which_alternative)
608 case 0: /* fall through */
609 case 4: return "vsub.i64\t%P0, %P1, %P2";
610 case 1: /* fall through */
611 case 2: /* fall through */
612 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613 default: gcc_unreachable ();
616 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617 (set_attr "conds" "*,clob,clob,clob,*")
618 (set_attr "length" "*,8,8,8,*")
619 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
622 (define_insn "*mul<mode>3_neon"
623 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625 (match_operand:VDQW 2 "s_register_operand" "w")))]
626 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 (if_then_else (match_test "<Is_float_mode>")
630 (const_string "neon_fp_mul_s<q>")
631 (const_string "neon_mul_<V_elem_ch><q>")))]
634 (define_insn "mul<mode>3add<mode>_neon"
635 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637 (match_operand:VDQW 3 "s_register_operand" "w"))
638 (match_operand:VDQW 1 "s_register_operand" "0")))]
639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
642 (if_then_else (match_test "<Is_float_mode>")
643 (const_string "neon_fp_mla_s<q>")
644 (const_string "neon_mla_<V_elem_ch><q>")))]
647 (define_insn "mul<mode>3add<mode>_neon"
648 [(set (match_operand:VH 0 "s_register_operand" "=w")
649 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650 (match_operand:VH 3 "s_register_operand" "w"))
651 (match_operand:VH 1 "s_register_operand" "0")))]
652 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661 (match_operand:VDQW 3 "s_register_operand" "w"))))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;; to be able to use when converting to FMA.
674 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676 [(set (match_operand:VCVTF 0 "register_operand" "=w")
677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678 (match_operand:VCVTF 2 "register_operand" "w")
679 (match_operand:VCVTF 3 "register_operand" "0")))]
680 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682 [(set_attr "type" "neon_fp_mla_s<q>")]
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686 [(set (match_operand:VCVTF 0 "register_operand" "=w")
687 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688 (match_operand:VCVTF 2 "register_operand" "w")
689 (match_operand:VCVTF 3 "register_operand" "0")))]
690 "TARGET_NEON && TARGET_FMA"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
695 (define_insn "fma<VH:mode>4"
696 [(set (match_operand:VH 0 "register_operand" "=w")
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VH:mode>4_intrinsic"
707 [(set (match_operand:VH 0 "register_operand" "=w")
709 (match_operand:VH 1 "register_operand" "w")
710 (match_operand:VH 2 "register_operand" "w")
711 (match_operand:VH 3 "register_operand" "0")))]
712 "TARGET_NEON_FP16INST"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
717 (define_insn "*fmsub<VCVTF:mode>4"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728 [(set (match_operand:VCVTF 0 "register_operand" "=w")
730 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731 (match_operand:VCVTF 2 "register_operand" "w")
732 (match_operand:VCVTF 3 "register_operand" "0")))]
733 "TARGET_NEON && TARGET_FMA"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739 [(set (match_operand:VH 0 "register_operand" "=w")
741 (neg:VH (match_operand:VH 1 "register_operand" "w"))
742 (match_operand:VH 2 "register_operand" "w")
743 (match_operand:VH 3 "register_operand" "0")))]
744 "TARGET_NEON_FP16INST"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751 (unspec:VCVTF [(match_operand:VCVTF 1
752 "s_register_operand" "w")]
754 "TARGET_NEON && TARGET_FPU_ARMV8"
755 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761 (FIXUORS:<V_cmp_result> (unspec:VCVTF
762 [(match_operand:VCVTF 1 "register_operand" "w")]
764 "TARGET_NEON && TARGET_FPU_ARMV8"
765 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767 (set_attr "predicable" "no")]
770 (define_insn "ior<mode>3"
771 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
776 switch (which_alternative)
778 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781 default: gcc_unreachable ();
784 [(set_attr "type" "neon_logic<q>")]
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
792 (define_insn "and<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
798 switch (which_alternative)
800 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vand", &operands[2],
802 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
806 [(set_attr "type" "neon_logic<q>")]
809 (define_insn "orn<mode>3_neon"
810 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812 (match_operand:VDQ 1 "s_register_operand" "w")))]
814 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815 [(set_attr "type" "neon_logic<q>")]
818 ;; TODO: investigate whether we should disable
819 ;; this and bicdi3_neon for the A8 in line with the other
821 (define_insn_and_split "orndi3_neon"
822 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
832 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
839 operands[3] = gen_highpart (SImode, operands[0]);
840 operands[0] = gen_lowpart (SImode, operands[0]);
841 operands[4] = gen_highpart (SImode, operands[2]);
842 operands[2] = gen_lowpart (SImode, operands[2]);
843 operands[5] = gen_highpart (SImode, operands[1]);
844 operands[1] = gen_lowpart (SImode, operands[1]);
848 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
853 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854 (set_attr "length" "*,16,8,8")
855 (set_attr "arch" "any,a,t2,t2")]
858 (define_insn "bic<mode>3_neon"
859 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861 (match_operand:VDQ 1 "s_register_operand" "w")))]
863 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 [(set_attr "type" "neon_logic<q>")]
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
877 [(set_attr "type" "neon_logic,multiple,multiple")
878 (set_attr "length" "*,8,8")]
881 (define_insn "xor<mode>3"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884 (match_operand:VDQ 2 "s_register_operand" "w")))]
886 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
890 (define_insn "one_cmpl<mode>2"
891 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
894 "vmvn\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_move<q>")]
898 (define_insn "abs<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
902 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_abs_s<q>")
906 (const_string "neon_abs<q>")))]
909 (define_insn "neg<mode>2"
910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
913 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
915 (if_then_else (match_test "<Is_float_mode>")
916 (const_string "neon_fp_neg_s<q>")
917 (const_string "neon_neg<q>")))]
920 (define_insn "negdi2_neon"
921 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
922 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
923 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
924 (clobber (reg:CC CC_REGNUM))]
927 [(set_attr "length" "8")
928 (set_attr "type" "multiple")]
931 ; Split negdi2_neon for vfp registers
933 [(set (match_operand:DI 0 "s_register_operand" "")
934 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935 (clobber (match_scratch:DI 2 ""))
936 (clobber (reg:CC CC_REGNUM))]
937 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938 [(set (match_dup 2) (const_int 0))
939 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940 (clobber (reg:CC CC_REGNUM))])]
942 if (!REG_P (operands[2]))
943 operands[2] = operands[0];
947 ; Split negdi2_neon for core registers
949 [(set (match_operand:DI 0 "s_register_operand" "")
950 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951 (clobber (match_scratch:DI 2 ""))
952 (clobber (reg:CC CC_REGNUM))]
953 "TARGET_32BIT && reload_completed
954 && arm_general_register_operand (operands[0], DImode)"
955 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956 (clobber (reg:CC CC_REGNUM))])]
960 (define_insn "<absneg_str><mode>2"
961 [(set (match_operand:VH 0 "s_register_operand" "=w")
962 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963 "TARGET_NEON_FP16INST"
964 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965 [(set_attr "type" "neon_abs<q>")]
968 (define_expand "neon_v<absneg_str><mode>"
970 (match_operand:VH 0 "s_register_operand")
971 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972 "TARGET_NEON_FP16INST"
974 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979 [(set (match_operand:VH 0 "s_register_operand" "=w")
981 [(match_operand:VH 1 "s_register_operand" "w")]
983 "TARGET_NEON_FP16INST"
984 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_round_s<q>")]
988 (define_insn "neon_vrsqrte<mode>"
989 [(set (match_operand:VH 0 "s_register_operand" "=w")
991 [(match_operand:VH 1 "s_register_operand" "w")]
993 "TARGET_NEON_FP16INST"
994 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
998 (define_insn "*umin<mode>3_neon"
999 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1003 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 [(set_attr "type" "neon_minmax<q>")]
1007 (define_insn "*umax<mode>3_neon"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1012 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013 [(set_attr "type" "neon_minmax<q>")]
1016 (define_insn "*smin<mode>3_neon"
1017 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019 (match_operand:VDQW 2 "s_register_operand" "w")))]
1021 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1023 (if_then_else (match_test "<Is_float_mode>")
1024 (const_string "neon_fp_minmax_s<q>")
1025 (const_string "neon_minmax<q>")))]
1028 (define_insn "*smax<mode>3_neon"
1029 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031 (match_operand:VDQW 2 "s_register_operand" "w")))]
1033 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1035 (if_then_else (match_test "<Is_float_mode>")
1036 (const_string "neon_fp_minmax_s<q>")
1037 (const_string "neon_minmax<q>")))]
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code. It ends up creating a V2DI constructor with
1044 (define_insn "vashl<mode>3"
1045 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1050 switch (which_alternative)
1052 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1055 VALID_NEON_QREG_MODE (<MODE>mode),
1057 default: gcc_unreachable ();
1060 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1063 (define_insn "vashr<mode>3_imm"
1064 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1069 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1073 [(set_attr "type" "neon_shift_imm<q>")]
1076 (define_insn "vlshr<mode>3_imm"
1077 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1082 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1086 [(set_attr "type" "neon_shift_imm<q>")]
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1094 (define_insn "ashl<mode>3_signed"
1095 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097 (match_operand:VDQI 2 "s_register_operand" "w")]
1098 UNSPEC_ASHIFT_SIGNED))]
1100 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101 [(set_attr "type" "neon_shift_reg<q>")]
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1107 (define_insn "ashl<mode>3_unsigned"
1108 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110 (match_operand:VDQI 2 "s_register_operand" "w")]
1111 UNSPEC_ASHIFT_UNSIGNED))]
1113 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114 [(set_attr "type" "neon_shift_reg<q>")]
1117 (define_expand "vashr<mode>3"
1118 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1123 if (s_register_operand (operands[2], <MODE>mode))
1125 rtx neg = gen_reg_rtx (<MODE>mode);
1126 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1130 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1134 (define_expand "vlshr<mode>3"
1135 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1140 if (s_register_operand (operands[2], <MODE>mode))
1142 rtx neg = gen_reg_rtx (<MODE>mode);
1143 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1147 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized. This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1158 (define_insn "neon_load_count"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161 UNSPEC_LOAD_COUNT))]
1164 vld1.32\t{%P0[0]}, %A1
1165 vmov.32\t%P0[0], %1"
1166 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1169 (define_insn "ashldi3_neon_noclobber"
1170 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1171 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173 "TARGET_NEON && reload_completed
1174 && (!CONST_INT_P (operands[2])
1175 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1177 vshl.u64\t%P0, %P1, %2
1178 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1182 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1186 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1187 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1189 (clobber (reg:CC_C CC_REGNUM))]
1192 "TARGET_NEON && reload_completed"
1196 if (IS_VFP_REGNUM (REGNO (operands[0])))
1198 if (CONST_INT_P (operands[2]))
1200 if (INTVAL (operands[2]) < 1)
1202 emit_insn (gen_movdi (operands[0], operands[1]));
1205 else if (INTVAL (operands[2]) > 63)
1206 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1210 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211 operands[2] = operands[5];
1214 /* Ditch the unnecessary clobbers. */
1215 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1220 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1]));
1224 if (operands[2] == CONST1_RTX (SImode))
1225 /* This clobbers CC. */
1226 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1228 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1229 operands[2], operands[3], operands[4]);
1233 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1234 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1235 (set_attr "type" "multiple")]
1238 ; The shift amount needs to be negated for right-shifts
1239 (define_insn "signed_shift_di3_neon"
1240 [(set (match_operand:DI 0 "s_register_operand" "=w")
1241 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1242 (match_operand:DI 2 "s_register_operand" " w")]
1243 UNSPEC_ASHIFT_SIGNED))]
1244 "TARGET_NEON && reload_completed"
1245 "vshl.s64\t%P0, %P1, %P2"
1246 [(set_attr "type" "neon_shift_reg")]
1249 ; The shift amount needs to be negated for right-shifts
1250 (define_insn "unsigned_shift_di3_neon"
1251 [(set (match_operand:DI 0 "s_register_operand" "=w")
1252 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1253 (match_operand:DI 2 "s_register_operand" " w")]
1254 UNSPEC_ASHIFT_UNSIGNED))]
1255 "TARGET_NEON && reload_completed"
1256 "vshl.u64\t%P0, %P1, %P2"
1257 [(set_attr "type" "neon_shift_reg")]
1260 (define_insn "ashrdi3_neon_imm_noclobber"
1261 [(set (match_operand:DI 0 "s_register_operand" "=w")
1262 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1263 (match_operand:DI 2 "const_int_operand" " i")))]
1264 "TARGET_NEON && reload_completed
1265 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1266 "vshr.s64\t%P0, %P1, %2"
1267 [(set_attr "type" "neon_shift_imm")]
1270 (define_insn "lshrdi3_neon_imm_noclobber"
1271 [(set (match_operand:DI 0 "s_register_operand" "=w")
1272 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1273 (match_operand:DI 2 "const_int_operand" " i")))]
1274 "TARGET_NEON && reload_completed
1275 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1276 "vshr.u64\t%P0, %P1, %2"
1277 [(set_attr "type" "neon_shift_imm")]
1282 (define_insn_and_split "<shift>di3_neon"
1283 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1284 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1285 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1286 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1287 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1288 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1289 (clobber (reg:CC CC_REGNUM))]
1292 "TARGET_NEON && reload_completed"
1296 if (IS_VFP_REGNUM (REGNO (operands[0])))
1298 if (CONST_INT_P (operands[2]))
1300 if (INTVAL (operands[2]) < 1)
1302 emit_insn (gen_movdi (operands[0], operands[1]));
1305 else if (INTVAL (operands[2]) > 64)
1306 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1308 /* Ditch the unnecessary clobbers. */
1309 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1315 /* We must use a negative left-shift. */
1316 emit_insn (gen_negsi2 (operands[3], operands[2]));
1317 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1318 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1324 /* The shift expanders support either full overlap or no overlap. */
1325 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1326 || REGNO (operands[0]) == REGNO (operands[1]));
1328 if (operands[2] == CONST1_RTX (SImode))
1329 /* This clobbers CC. */
1330 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1332 /* This clobbers CC (ASHIFTRT by register only). */
1333 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1334 operands[2], operands[3], operands[4]);
1339 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1340 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1341 (set_attr "type" "multiple")]
1344 ;; Widening operations
1346 (define_expand "widen_ssum<mode>3"
1347 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1348 (plus:<V_double_width>
1349 (sign_extend:<V_double_width>
1350 (match_operand:VQI 1 "s_register_operand" ""))
1351 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1354 machine_mode mode = GET_MODE (operands[1]);
1357 p1 = arm_simd_vect_par_cnst_half (mode, false);
1358 p2 = arm_simd_vect_par_cnst_half (mode, true);
1360 if (operands[0] != operands[2])
1361 emit_move_insn (operands[0], operands[2]);
1363 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1367 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1375 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1376 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1377 (plus:<V_double_width>
1378 (sign_extend:<V_double_width>
1379 (vec_select:<V_HALF>
1380 (match_operand:VQI 1 "s_register_operand" "%w")
1381 (match_operand:VQI 2 "vect_par_constant_low" "")))
1382 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1385 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1386 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1388 [(set_attr "type" "neon_add_widen")])
1390 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1391 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1392 (plus:<V_double_width>
1393 (sign_extend:<V_double_width>
1394 (vec_select:<V_HALF>
1395 (match_operand:VQI 1 "s_register_operand" "%w")
1396 (match_operand:VQI 2 "vect_par_constant_high" "")))
1397 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1400 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1401 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1403 [(set_attr "type" "neon_add_widen")])
1405 (define_insn "widen_ssum<mode>3"
1406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1408 (sign_extend:<V_widen>
1409 (match_operand:VW 1 "s_register_operand" "%w"))
1410 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1412 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1413 [(set_attr "type" "neon_add_widen")]
1416 (define_expand "widen_usum<mode>3"
1417 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1418 (plus:<V_double_width>
1419 (zero_extend:<V_double_width>
1420 (match_operand:VQI 1 "s_register_operand" ""))
1421 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1424 machine_mode mode = GET_MODE (operands[1]);
1427 p1 = arm_simd_vect_par_cnst_half (mode, false);
1428 p2 = arm_simd_vect_par_cnst_half (mode, true);
1430 if (operands[0] != operands[2])
1431 emit_move_insn (operands[0], operands[2]);
1433 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1437 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1445 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1446 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1447 (plus:<V_double_width>
1448 (zero_extend:<V_double_width>
1449 (vec_select:<V_HALF>
1450 (match_operand:VQI 1 "s_register_operand" "%w")
1451 (match_operand:VQI 2 "vect_par_constant_low" "")))
1452 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1455 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1456 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1458 [(set_attr "type" "neon_add_widen")])
1460 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1461 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1462 (plus:<V_double_width>
1463 (zero_extend:<V_double_width>
1464 (vec_select:<V_HALF>
1465 (match_operand:VQI 1 "s_register_operand" "%w")
1466 (match_operand:VQI 2 "vect_par_constant_high" "")))
1467 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1470 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1471 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1473 [(set_attr "type" "neon_add_widen")])
1475 (define_insn "widen_usum<mode>3"
1476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1477 (plus:<V_widen> (zero_extend:<V_widen>
1478 (match_operand:VW 1 "s_register_operand" "%w"))
1479 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1481 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1482 [(set_attr "type" "neon_add_widen")]
1485 ;; Helpers for quad-word reduction operations
1487 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1488 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1489 ; N/2-element vector.
1491 (define_insn "quad_halves_<code>v4si"
1492 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1494 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1495 (parallel [(const_int 0) (const_int 1)]))
1496 (vec_select:V2SI (match_dup 1)
1497 (parallel [(const_int 2) (const_int 3)]))))]
1499 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1500 [(set_attr "vqh_mnem" "<VQH_mnem>")
1501 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1504 (define_insn "quad_halves_<code>v4sf"
1505 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1507 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1508 (parallel [(const_int 0) (const_int 1)]))
1509 (vec_select:V2SF (match_dup 1)
1510 (parallel [(const_int 2) (const_int 3)]))))]
1511 "TARGET_NEON && flag_unsafe_math_optimizations"
1512 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1513 [(set_attr "vqh_mnem" "<VQH_mnem>")
1514 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1517 (define_insn "quad_halves_<code>v8hi"
1518 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1520 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1521 (parallel [(const_int 0) (const_int 1)
1522 (const_int 2) (const_int 3)]))
1523 (vec_select:V4HI (match_dup 1)
1524 (parallel [(const_int 4) (const_int 5)
1525 (const_int 6) (const_int 7)]))))]
1527 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1532 (define_insn "quad_halves_<code>v16qi"
1533 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1535 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1536 (parallel [(const_int 0) (const_int 1)
1537 (const_int 2) (const_int 3)
1538 (const_int 4) (const_int 5)
1539 (const_int 6) (const_int 7)]))
1540 (vec_select:V8QI (match_dup 1)
1541 (parallel [(const_int 8) (const_int 9)
1542 (const_int 10) (const_int 11)
1543 (const_int 12) (const_int 13)
1544 (const_int 14) (const_int 15)]))))]
1546 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1547 [(set_attr "vqh_mnem" "<VQH_mnem>")
1548 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1551 (define_expand "move_hi_quad_<mode>"
1552 [(match_operand:ANY128 0 "s_register_operand" "")
1553 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1556 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1557 GET_MODE_SIZE (<V_HALF>mode)),
1562 (define_expand "move_lo_quad_<mode>"
1563 [(match_operand:ANY128 0 "s_register_operand" "")
1564 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1567 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1573 ;; Reduction operations
1575 (define_expand "reduc_plus_scal_<mode>"
1576 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1577 (match_operand:VD 1 "s_register_operand" "")]
1578 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1580 rtx vec = gen_reg_rtx (<MODE>mode);
1581 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1582 &gen_neon_vpadd_internal<mode>);
1583 /* The same result is actually computed into every element. */
1584 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1588 (define_expand "reduc_plus_scal_<mode>"
1589 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590 (match_operand:VQ 1 "s_register_operand" "")]
1591 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1592 && !BYTES_BIG_ENDIAN"
1594 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1596 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1597 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1602 (define_expand "reduc_plus_scal_v2di"
1603 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1604 (match_operand:V2DI 1 "s_register_operand" "")]
1605 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1607 rtx vec = gen_reg_rtx (V2DImode);
1609 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1610 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1615 (define_insn "arm_reduc_plus_internal_v2di"
1616 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1617 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1619 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620 "vadd.i64\t%e0, %e1, %f1"
1621 [(set_attr "type" "neon_add_q")]
1624 (define_expand "reduc_smin_scal_<mode>"
1625 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1626 (match_operand:VD 1 "s_register_operand" "")]
1627 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1629 rtx vec = gen_reg_rtx (<MODE>mode);
1631 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1632 &gen_neon_vpsmin<mode>);
1633 /* The result is computed into every element of the vector. */
1634 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1638 (define_expand "reduc_smin_scal_<mode>"
1639 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1640 (match_operand:VQ 1 "s_register_operand" "")]
1641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1642 && !BYTES_BIG_ENDIAN"
1644 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1646 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1647 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1652 (define_expand "reduc_smax_scal_<mode>"
1653 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1654 (match_operand:VD 1 "s_register_operand" "")]
1655 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1657 rtx vec = gen_reg_rtx (<MODE>mode);
1658 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1659 &gen_neon_vpsmax<mode>);
1660 /* The result is computed into every element of the vector. */
1661 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1665 (define_expand "reduc_smax_scal_<mode>"
1666 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667 (match_operand:VQ 1 "s_register_operand" "")]
1668 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1669 && !BYTES_BIG_ENDIAN"
1671 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1673 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1674 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1679 (define_expand "reduc_umin_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VDI 1 "s_register_operand" "")]
1684 rtx vec = gen_reg_rtx (<MODE>mode);
1685 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1686 &gen_neon_vpumin<mode>);
1687 /* The result is computed into every element of the vector. */
1688 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1692 (define_expand "reduc_umin_scal_<mode>"
1693 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694 (match_operand:VQI 1 "s_register_operand" "")]
1695 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1697 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1699 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1700 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1705 (define_expand "reduc_umax_scal_<mode>"
1706 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707 (match_operand:VDI 1 "s_register_operand" "")]
1710 rtx vec = gen_reg_rtx (<MODE>mode);
1711 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1712 &gen_neon_vpumax<mode>);
1713 /* The result is computed into every element of the vector. */
1714 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1718 (define_expand "reduc_umax_scal_<mode>"
1719 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720 (match_operand:VQI 1 "s_register_operand" "")]
1721 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1723 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1725 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1726 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1731 (define_insn "neon_vpadd_internal<mode>"
1732 [(set (match_operand:VD 0 "s_register_operand" "=w")
1733 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1734 (match_operand:VD 2 "s_register_operand" "w")]
1737 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1738 ;; Assume this schedules like vadd.
1740 (if_then_else (match_test "<Is_float_mode>")
1741 (const_string "neon_fp_reduc_add_s<q>")
1742 (const_string "neon_reduc_add<q>")))]
1745 (define_insn "neon_vpaddv4hf"
1747 (match_operand:V4HF 0 "s_register_operand" "=w")
1748 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1749 (match_operand:V4HF 2 "s_register_operand" "w")]
1751 "TARGET_NEON_FP16INST"
1752 "vpadd.f16\t%P0, %P1, %P2"
1753 [(set_attr "type" "neon_reduc_add")]
1756 (define_insn "neon_vpsmin<mode>"
1757 [(set (match_operand:VD 0 "s_register_operand" "=w")
1758 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1759 (match_operand:VD 2 "s_register_operand" "w")]
1762 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1764 (if_then_else (match_test "<Is_float_mode>")
1765 (const_string "neon_fp_reduc_minmax_s<q>")
1766 (const_string "neon_reduc_minmax<q>")))]
1769 (define_insn "neon_vpsmax<mode>"
1770 [(set (match_operand:VD 0 "s_register_operand" "=w")
1771 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772 (match_operand:VD 2 "s_register_operand" "w")]
1775 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1777 (if_then_else (match_test "<Is_float_mode>")
1778 (const_string "neon_fp_reduc_minmax_s<q>")
1779 (const_string "neon_reduc_minmax<q>")))]
1782 (define_insn "neon_vpumin<mode>"
1783 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1784 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1785 (match_operand:VDI 2 "s_register_operand" "w")]
1788 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1789 [(set_attr "type" "neon_reduc_minmax<q>")]
1792 (define_insn "neon_vpumax<mode>"
1793 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1794 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1795 (match_operand:VDI 2 "s_register_operand" "w")]
1798 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1799 [(set_attr "type" "neon_reduc_minmax<q>")]
1802 ;; Saturating arithmetic
1804 ; NOTE: Neon supports many more saturating variants of instructions than the
1805 ; following, but these are all GCC currently understands.
1806 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1807 ; yet either, although these patterns may be used by intrinsics when they're
1810 (define_insn "*ss_add<mode>_neon"
1811 [(set (match_operand:VD 0 "s_register_operand" "=w")
1812 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1813 (match_operand:VD 2 "s_register_operand" "w")))]
1815 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1816 [(set_attr "type" "neon_qadd<q>")]
1819 (define_insn "*us_add<mode>_neon"
1820 [(set (match_operand:VD 0 "s_register_operand" "=w")
1821 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1822 (match_operand:VD 2 "s_register_operand" "w")))]
1824 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1825 [(set_attr "type" "neon_qadd<q>")]
1828 (define_insn "*ss_sub<mode>_neon"
1829 [(set (match_operand:VD 0 "s_register_operand" "=w")
1830 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1831 (match_operand:VD 2 "s_register_operand" "w")))]
1833 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1834 [(set_attr "type" "neon_qsub<q>")]
1837 (define_insn "*us_sub<mode>_neon"
1838 [(set (match_operand:VD 0 "s_register_operand" "=w")
1839 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1840 (match_operand:VD 2 "s_register_operand" "w")))]
1842 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1843 [(set_attr "type" "neon_qsub<q>")]
1846 ;; Conditional instructions. These are comparisons with conditional moves for
1847 ;; vectors. They perform the assignment:
1849 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1851 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1854 (define_expand "vcond<mode><mode>"
1855 [(set (match_operand:VDQW 0 "s_register_operand" "")
1857 (match_operator 3 "comparison_operator"
1858 [(match_operand:VDQW 4 "s_register_operand" "")
1859 (match_operand:VDQW 5 "nonmemory_operand" "")])
1860 (match_operand:VDQW 1 "s_register_operand" "")
1861 (match_operand:VDQW 2 "s_register_operand" "")))]
1862 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1865 int use_zero_form = 0;
1866 int swap_bsl_operands = 0;
1867 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1868 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1870 rtx (*base_comparison) (rtx, rtx, rtx);
1871 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1873 switch (GET_CODE (operands[3]))
1880 if (operands[5] == CONST0_RTX (<MODE>mode))
1887 if (!REG_P (operands[5]))
1888 operands[5] = force_reg (<MODE>mode, operands[5]);
1891 switch (GET_CODE (operands[3]))
1901 base_comparison = gen_neon_vcge<mode>;
1902 complimentary_comparison = gen_neon_vcgt<mode>;
1910 base_comparison = gen_neon_vcgt<mode>;
1911 complimentary_comparison = gen_neon_vcge<mode>;
1916 base_comparison = gen_neon_vceq<mode>;
1917 complimentary_comparison = gen_neon_vceq<mode>;
1923 switch (GET_CODE (operands[3]))
1930 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1931 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1937 Note that there also exist direct comparison against 0 forms,
1938 so catch those as a special case. */
1942 switch (GET_CODE (operands[3]))
1945 base_comparison = gen_neon_vclt<mode>;
1948 base_comparison = gen_neon_vcle<mode>;
1951 /* Do nothing, other zero form cases already have the correct
1958 emit_insn (base_comparison (mask, operands[4], operands[5]));
1960 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1967 /* Vector compare returns false for lanes which are unordered, so if we use
1968 the inverse of the comparison we actually want to emit, then
1969 swap the operands to BSL, we will end up with the correct result.
1970 Note that a NE NaN and NaN NE b are true for all a, b.
1972 Our transformations are:
1977 a NE b -> !(a EQ b) */
1980 emit_insn (base_comparison (mask, operands[4], operands[5]));
1982 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1984 swap_bsl_operands = 1;
1987 /* We check (a > b || b > a). combining these comparisons give us
1988 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1989 will then give us (a == b || a UNORDERED b) as intended. */
1991 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1992 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1993 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1994 swap_bsl_operands = 1;
1997 /* Operands are ORDERED iff (a > b || b >= a).
1998 Swapping the operands to BSL will give the UNORDERED case. */
1999 swap_bsl_operands = 1;
2002 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2003 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2004 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2010 if (swap_bsl_operands)
2011 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2014 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2019 (define_expand "vcondu<mode><mode>"
2020 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2022 (match_operator 3 "arm_comparison_operator"
2023 [(match_operand:VDQIW 4 "s_register_operand" "")
2024 (match_operand:VDQIW 5 "s_register_operand" "")])
2025 (match_operand:VDQIW 1 "s_register_operand" "")
2026 (match_operand:VDQIW 2 "s_register_operand" "")))]
2030 int inverse = 0, immediate_zero = 0;
2032 mask = gen_reg_rtx (<V_cmp_result>mode);
2034 if (operands[5] == CONST0_RTX (<MODE>mode))
2036 else if (!REG_P (operands[5]))
2037 operands[5] = force_reg (<MODE>mode, operands[5]);
2039 switch (GET_CODE (operands[3]))
2042 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2046 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2050 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2055 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2057 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2062 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2064 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2068 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2077 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2080 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2086 ;; Patterns for builtins.
2088 ; good for plain vadd, vaddq.
2090 (define_expand "neon_vadd<mode>"
2091 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2092 (match_operand:VCVTF 1 "s_register_operand" "w")
2093 (match_operand:VCVTF 2 "s_register_operand" "w")]
2096 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2097 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2099 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2104 (define_expand "neon_vadd<mode>"
2105 [(match_operand:VH 0 "s_register_operand")
2106 (match_operand:VH 1 "s_register_operand")
2107 (match_operand:VH 2 "s_register_operand")]
2108 "TARGET_NEON_FP16INST"
2110 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2114 (define_expand "neon_vsub<mode>"
2115 [(match_operand:VH 0 "s_register_operand")
2116 (match_operand:VH 1 "s_register_operand")
2117 (match_operand:VH 2 "s_register_operand")]
2118 "TARGET_NEON_FP16INST"
2120 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2124 ; Note that NEON operations don't support the full IEEE 754 standard: in
2125 ; particular, denormal values are flushed to zero. This means that GCC cannot
2126 ; use those instructions for autovectorization, etc. unless
2127 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2128 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2129 ; header) must work in either case: if -funsafe-math-optimizations is given,
2130 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2131 ; expand to unspecs (which may potentially limit the extent to which they might
2132 ; be optimized by generic code).
2134 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2136 (define_insn "neon_vadd<mode>_unspec"
2137 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2138 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2139 (match_operand:VCVTF 2 "s_register_operand" "w")]
2142 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2144 (if_then_else (match_test "<Is_float_mode>")
2145 (const_string "neon_fp_addsub_s<q>")
2146 (const_string "neon_add<q>")))]
2149 (define_insn "neon_vaddl<sup><mode>"
2150 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2151 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2152 (match_operand:VDI 2 "s_register_operand" "w")]
2155 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2156 [(set_attr "type" "neon_add_long")]
2159 (define_insn "neon_vaddw<sup><mode>"
2160 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2161 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2162 (match_operand:VDI 2 "s_register_operand" "w")]
2165 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2166 [(set_attr "type" "neon_add_widen")]
2171 (define_insn "neon_v<r>hadd<sup><mode>"
2172 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2173 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2174 (match_operand:VDQIW 2 "s_register_operand" "w")]
2177 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2178 [(set_attr "type" "neon_add_halve_q")]
2181 (define_insn "neon_vqadd<sup><mode>"
2182 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2183 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2184 (match_operand:VDQIX 2 "s_register_operand" "w")]
2187 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2188 [(set_attr "type" "neon_qadd<q>")]
2191 (define_insn "neon_v<r>addhn<mode>"
2192 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2193 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2194 (match_operand:VN 2 "s_register_operand" "w")]
2197 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2198 [(set_attr "type" "neon_add_halve_narrow_q")]
2201 ;; Polynomial and Float multiplication.
2202 (define_insn "neon_vmul<pf><mode>"
2203 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2204 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2205 (match_operand:VPF 2 "s_register_operand" "w")]
2208 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2210 (if_then_else (match_test "<Is_float_mode>")
2211 (const_string "neon_fp_mul_s<q>")
2212 (const_string "neon_mul_<V_elem_ch><q>")))]
2215 (define_insn "mul<mode>3"
2217 (match_operand:VH 0 "s_register_operand" "=w")
2219 (match_operand:VH 1 "s_register_operand" "w")
2220 (match_operand:VH 2 "s_register_operand" "w")))]
2221 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2222 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2223 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2226 (define_insn "neon_vmulf<mode>"
2228 (match_operand:VH 0 "s_register_operand" "=w")
2230 (match_operand:VH 1 "s_register_operand" "w")
2231 (match_operand:VH 2 "s_register_operand" "w")))]
2232 "TARGET_NEON_FP16INST"
2233 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2234 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2237 (define_expand "neon_vmla<mode>"
2238 [(match_operand:VDQW 0 "s_register_operand" "=w")
2239 (match_operand:VDQW 1 "s_register_operand" "0")
2240 (match_operand:VDQW 2 "s_register_operand" "w")
2241 (match_operand:VDQW 3 "s_register_operand" "w")]
2244 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2245 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2246 operands[2], operands[3]));
2248 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2249 operands[2], operands[3]));
2253 (define_expand "neon_vfma<VCVTF:mode>"
2254 [(match_operand:VCVTF 0 "s_register_operand")
2255 (match_operand:VCVTF 1 "s_register_operand")
2256 (match_operand:VCVTF 2 "s_register_operand")
2257 (match_operand:VCVTF 3 "s_register_operand")]
2258 "TARGET_NEON && TARGET_FMA"
2260 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2265 (define_expand "neon_vfma<VH:mode>"
2266 [(match_operand:VH 0 "s_register_operand")
2267 (match_operand:VH 1 "s_register_operand")
2268 (match_operand:VH 2 "s_register_operand")
2269 (match_operand:VH 3 "s_register_operand")]
2270 "TARGET_NEON_FP16INST"
2272 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2277 (define_expand "neon_vfms<VCVTF:mode>"
2278 [(match_operand:VCVTF 0 "s_register_operand")
2279 (match_operand:VCVTF 1 "s_register_operand")
2280 (match_operand:VCVTF 2 "s_register_operand")
2281 (match_operand:VCVTF 3 "s_register_operand")]
2282 "TARGET_NEON && TARGET_FMA"
2284 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2289 (define_expand "neon_vfms<VH:mode>"
2290 [(match_operand:VH 0 "s_register_operand")
2291 (match_operand:VH 1 "s_register_operand")
2292 (match_operand:VH 2 "s_register_operand")
2293 (match_operand:VH 3 "s_register_operand")]
2294 "TARGET_NEON_FP16INST"
2296 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2301 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2303 (define_insn "neon_vmla<mode>_unspec"
2304 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2305 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2306 (match_operand:VDQW 2 "s_register_operand" "w")
2307 (match_operand:VDQW 3 "s_register_operand" "w")]
2310 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2312 (if_then_else (match_test "<Is_float_mode>")
2313 (const_string "neon_fp_mla_s<q>")
2314 (const_string "neon_mla_<V_elem_ch><q>")))]
2317 (define_insn "neon_vmlal<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2320 (match_operand:VW 2 "s_register_operand" "w")
2321 (match_operand:VW 3 "s_register_operand" "w")]
2324 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2325 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2328 (define_expand "neon_vmls<mode>"
2329 [(match_operand:VDQW 0 "s_register_operand" "=w")
2330 (match_operand:VDQW 1 "s_register_operand" "0")
2331 (match_operand:VDQW 2 "s_register_operand" "w")
2332 (match_operand:VDQW 3 "s_register_operand" "w")]
2335 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2336 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2337 operands[1], operands[2], operands[3]));
2339 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2340 operands[2], operands[3]));
2344 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2346 (define_insn "neon_vmls<mode>_unspec"
2347 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2348 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2349 (match_operand:VDQW 2 "s_register_operand" "w")
2350 (match_operand:VDQW 3 "s_register_operand" "w")]
2353 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2355 (if_then_else (match_test "<Is_float_mode>")
2356 (const_string "neon_fp_mla_s<q>")
2357 (const_string "neon_mla_<V_elem_ch><q>")))]
2360 (define_insn "neon_vmlsl<sup><mode>"
2361 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2362 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2363 (match_operand:VW 2 "s_register_operand" "w")
2364 (match_operand:VW 3 "s_register_operand" "w")]
2367 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2368 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2371 ;; vqdmulh, vqrdmulh
2372 (define_insn "neon_vq<r>dmulh<mode>"
2373 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2374 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2375 (match_operand:VMDQI 2 "s_register_operand" "w")]
2378 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2382 ;; vqrdmlah, vqrdmlsh
2383 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2384 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2385 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2386 (match_operand:VMDQI 2 "s_register_operand" "w")
2387 (match_operand:VMDQI 3 "s_register_operand" "w")]
2390 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2391 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2394 (define_insn "neon_vqdmlal<mode>"
2395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2397 (match_operand:VMDI 2 "s_register_operand" "w")
2398 (match_operand:VMDI 3 "s_register_operand" "w")]
2401 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2402 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2405 (define_insn "neon_vqdmlsl<mode>"
2406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2407 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2408 (match_operand:VMDI 2 "s_register_operand" "w")
2409 (match_operand:VMDI 3 "s_register_operand" "w")]
2412 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2413 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2416 (define_insn "neon_vmull<sup><mode>"
2417 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2418 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2419 (match_operand:VW 2 "s_register_operand" "w")]
2422 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2423 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2426 (define_insn "neon_vqdmull<mode>"
2427 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2428 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2429 (match_operand:VMDI 2 "s_register_operand" "w")]
2432 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2433 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2436 (define_expand "neon_vsub<mode>"
2437 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2438 (match_operand:VCVTF 1 "s_register_operand" "w")
2439 (match_operand:VCVTF 2 "s_register_operand" "w")]
2442 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2443 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2445 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2450 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2452 (define_insn "neon_vsub<mode>_unspec"
2453 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2454 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2455 (match_operand:VCVTF 2 "s_register_operand" "w")]
2458 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2460 (if_then_else (match_test "<Is_float_mode>")
2461 (const_string "neon_fp_addsub_s<q>")
2462 (const_string "neon_sub<q>")))]
2465 (define_insn "neon_vsubl<sup><mode>"
2466 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2467 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2468 (match_operand:VDI 2 "s_register_operand" "w")]
2471 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2472 [(set_attr "type" "neon_sub_long")]
2475 (define_insn "neon_vsubw<sup><mode>"
2476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2477 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2478 (match_operand:VDI 2 "s_register_operand" "w")]
2481 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2482 [(set_attr "type" "neon_sub_widen")]
2485 (define_insn "neon_vqsub<sup><mode>"
2486 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2487 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2488 (match_operand:VDQIX 2 "s_register_operand" "w")]
2491 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2492 [(set_attr "type" "neon_qsub<q>")]
2495 (define_insn "neon_vhsub<sup><mode>"
2496 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2497 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2498 (match_operand:VDQIW 2 "s_register_operand" "w")]
2501 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2502 [(set_attr "type" "neon_sub_halve<q>")]
2505 (define_insn "neon_v<r>subhn<mode>"
2506 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2507 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2508 (match_operand:VN 2 "s_register_operand" "w")]
2511 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2512 [(set_attr "type" "neon_sub_halve_narrow_q")]
2515 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2516 ;; without unsafe math optimizations.
2517 (define_expand "neon_vc<cmp_op><mode>"
2518 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2520 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2521 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2524 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2526 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2527 && !flag_unsafe_math_optimizations)
2529 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2530 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2531 whereas this expander iterates over the integer modes as well,
2532 but we will never expand to UNSPECs for the integer comparisons. */
2536 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2541 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2550 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2557 (define_insn "neon_vc<cmp_op><mode>_insn"
2558 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2560 (COMPARISONS:<V_cmp_result>
2561 (match_operand:VDQW 1 "s_register_operand" "w,w")
2562 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2563 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2564 && !flag_unsafe_math_optimizations)"
2567 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2569 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2570 ? "f" : "<cmp_type>",
2571 which_alternative == 0
2572 ? "%<V_reg>2" : "#0");
2573 output_asm_insn (pattern, operands);
2577 (if_then_else (match_operand 2 "zero_operand")
2578 (const_string "neon_compare_zero<q>")
2579 (const_string "neon_compare<q>")))]
2582 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2583 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2584 (unspec:<V_cmp_result>
2585 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2586 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2591 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2593 which_alternative == 0
2594 ? "%<V_reg>2" : "#0");
2595 output_asm_insn (pattern, operands);
2598 [(set_attr "type" "neon_fp_compare_s<q>")]
2601 (define_expand "neon_vc<cmp_op><mode>"
2602 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2605 (match_operand:VH 1 "s_register_operand")
2606 (match_operand:VH 2 "reg_or_zero_operand")))]
2607 "TARGET_NEON_FP16INST"
2609 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2611 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2612 && !flag_unsafe_math_optimizations)
2614 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2615 (operands[0], operands[1], operands[2]));
2618 (gen_neon_vc<cmp_op><mode>_fp16insn
2619 (operands[0], operands[1], operands[2]));
2623 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2624 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2626 (COMPARISONS:<V_cmp_result>
2627 (match_operand:VH 1 "s_register_operand" "w,w")
2628 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2629 "TARGET_NEON_FP16INST
2630 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2631 && !flag_unsafe_math_optimizations)"
2634 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2636 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2637 ? "f" : "<cmp_type>",
2638 which_alternative == 0
2639 ? "%<V_reg>2" : "#0");
2640 output_asm_insn (pattern, operands);
2644 (if_then_else (match_operand 2 "zero_operand")
2645 (const_string "neon_compare_zero<q>")
2646 (const_string "neon_compare<q>")))])
2648 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2650 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2651 (unspec:<V_cmp_result>
2652 [(match_operand:VH 1 "s_register_operand" "w,w")
2653 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2655 "TARGET_NEON_FP16INST"
2658 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2660 which_alternative == 0
2661 ? "%<V_reg>2" : "#0");
2662 output_asm_insn (pattern, operands);
2665 [(set_attr "type" "neon_fp_compare_s<q>")])
2667 (define_insn "neon_vc<cmp_op>u<mode>"
2668 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2670 (GTUGEU:<V_cmp_result>
2671 (match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2674 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2675 [(set_attr "type" "neon_compare<q>")]
2678 (define_expand "neon_vca<cmp_op><mode>"
2679 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2681 (GTGE:<V_cmp_result>
2682 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2683 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2686 if (flag_unsafe_math_optimizations)
2687 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2690 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2697 (define_insn "neon_vca<cmp_op><mode>_insn"
2698 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (GTGE:<V_cmp_result>
2701 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2702 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2703 "TARGET_NEON && flag_unsafe_math_optimizations"
2704 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2705 [(set_attr "type" "neon_fp_compare_s<q>")]
2708 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2709 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2710 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2711 (match_operand:VCVTF 2 "s_register_operand" "w")]
2714 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2715 [(set_attr "type" "neon_fp_compare_s<q>")]
2718 (define_expand "neon_vca<cmp_op><mode>"
2720 (match_operand:<V_cmp_result> 0 "s_register_operand")
2722 (GLTE:<V_cmp_result>
2723 (abs:VH (match_operand:VH 1 "s_register_operand"))
2724 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2725 "TARGET_NEON_FP16INST"
2727 if (flag_unsafe_math_optimizations)
2728 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2729 (operands[0], operands[1], operands[2]));
2731 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2732 (operands[0], operands[1], operands[2]));
2736 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2738 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2740 (GLTE:<V_cmp_result>
2741 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2742 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2743 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2744 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2745 [(set_attr "type" "neon_fp_compare_s<q>")]
2748 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2749 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2750 (unspec:<V_cmp_result>
2751 [(match_operand:VH 1 "s_register_operand" "w")
2752 (match_operand:VH 2 "s_register_operand" "w")]
2755 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2756 [(set_attr "type" "neon_fp_compare_s<q>")]
2759 (define_expand "neon_vc<cmp_op>z<mode>"
2761 (match_operand:<V_cmp_result> 0 "s_register_operand")
2762 (COMPARISONS:<V_cmp_result>
2763 (match_operand:VH 1 "s_register_operand")
2765 "TARGET_NEON_FP16INST"
2767 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2768 CONST0_RTX (<MODE>mode)));
2772 (define_insn "neon_vtst<mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2775 (match_operand:VDQIW 2 "s_register_operand" "w")]
2778 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2779 [(set_attr "type" "neon_tst<q>")]
2782 (define_insn "neon_vabd<sup><mode>"
2783 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2784 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2785 (match_operand:VDQIW 2 "s_register_operand" "w")]
2788 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2789 [(set_attr "type" "neon_abd<q>")]
2792 (define_insn "neon_vabd<mode>"
2793 [(set (match_operand:VH 0 "s_register_operand" "=w")
2794 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2795 (match_operand:VH 2 "s_register_operand" "w")]
2797 "TARGET_NEON_FP16INST"
2798 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2799 [(set_attr "type" "neon_abd<q>")]
2802 (define_insn "neon_vabdf<mode>"
2803 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2804 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2805 (match_operand:VCVTF 2 "s_register_operand" "w")]
2808 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2809 [(set_attr "type" "neon_fp_abd_s<q>")]
2812 (define_insn "neon_vabdl<sup><mode>"
2813 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2814 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2815 (match_operand:VW 2 "s_register_operand" "w")]
2818 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2819 [(set_attr "type" "neon_abd_long")]
2822 (define_insn "neon_vaba<sup><mode>"
2823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2824 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2825 (match_operand:VDQIW 3 "s_register_operand" "w")]
2827 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2829 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2830 [(set_attr "type" "neon_arith_acc<q>")]
2833 (define_insn "neon_vabal<sup><mode>"
2834 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2835 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2836 (match_operand:VW 3 "s_register_operand" "w")]
2838 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2840 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2841 [(set_attr "type" "neon_arith_acc<q>")]
2844 (define_insn "neon_v<maxmin><sup><mode>"
2845 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2846 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2847 (match_operand:VDQIW 2 "s_register_operand" "w")]
2850 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2851 [(set_attr "type" "neon_minmax<q>")]
2854 (define_insn "neon_v<maxmin>f<mode>"
2855 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2856 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2857 (match_operand:VCVTF 2 "s_register_operand" "w")]
2860 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2861 [(set_attr "type" "neon_fp_minmax_s<q>")]
2864 (define_insn "neon_v<maxmin>f<mode>"
2865 [(set (match_operand:VH 0 "s_register_operand" "=w")
2867 [(match_operand:VH 1 "s_register_operand" "w")
2868 (match_operand:VH 2 "s_register_operand" "w")]
2870 "TARGET_NEON_FP16INST"
2871 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2872 [(set_attr "type" "neon_fp_minmax_s<q>")]
2875 (define_insn "neon_vp<maxmin>fv4hf"
2876 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2878 [(match_operand:V4HF 1 "s_register_operand" "w")
2879 (match_operand:V4HF 2 "s_register_operand" "w")]
2881 "TARGET_NEON_FP16INST"
2882 "vp<maxmin>.f16\t%P0, %P1, %P2"
2883 [(set_attr "type" "neon_reduc_minmax")]
2886 (define_insn "neon_<fmaxmin_op><mode>"
2888 (match_operand:VH 0 "s_register_operand" "=w")
2890 [(match_operand:VH 1 "s_register_operand" "w")
2891 (match_operand:VH 2 "s_register_operand" "w")]
2893 "TARGET_NEON_FP16INST"
2894 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2895 [(set_attr "type" "neon_fp_minmax_s<q>")]
2898 ;; v<maxmin>nm intrinsics.
2899 (define_insn "neon_<fmaxmin_op><mode>"
2900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2901 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2902 (match_operand:VCVTF 2 "s_register_operand" "w")]
2904 "TARGET_NEON && TARGET_FPU_ARMV8"
2905 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_fp_minmax_s<q>")]
2909 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2910 (define_insn "<fmaxmin><mode>3"
2911 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2912 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2913 (match_operand:VCVTF 2 "s_register_operand" "w")]
2915 "TARGET_NEON && TARGET_FPU_ARMV8"
2916 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_fp_minmax_s<q>")]
2920 (define_expand "neon_vpadd<mode>"
2921 [(match_operand:VD 0 "s_register_operand" "=w")
2922 (match_operand:VD 1 "s_register_operand" "w")
2923 (match_operand:VD 2 "s_register_operand" "w")]
2926 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2931 (define_insn "neon_vpaddl<sup><mode>"
2932 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2933 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2936 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2937 [(set_attr "type" "neon_reduc_add_long")]
2940 (define_insn "neon_vpadal<sup><mode>"
2941 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2942 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2943 (match_operand:VDQIW 2 "s_register_operand" "w")]
2946 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2947 [(set_attr "type" "neon_reduc_add_acc")]
2950 (define_insn "neon_vp<maxmin><sup><mode>"
2951 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2952 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2953 (match_operand:VDI 2 "s_register_operand" "w")]
2956 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2957 [(set_attr "type" "neon_reduc_minmax<q>")]
2960 (define_insn "neon_vp<maxmin>f<mode>"
2961 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2962 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2963 (match_operand:VCVTF 2 "s_register_operand" "w")]
2966 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2967 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2970 (define_insn "neon_vrecps<mode>"
2971 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2972 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2973 (match_operand:VCVTF 2 "s_register_operand" "w")]
2976 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2977 [(set_attr "type" "neon_fp_recps_s<q>")]
2980 (define_insn "neon_vrecps<mode>"
2982 (match_operand:VH 0 "s_register_operand" "=w")
2983 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2984 (match_operand:VH 2 "s_register_operand" "w")]
2986 "TARGET_NEON_FP16INST"
2987 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2988 [(set_attr "type" "neon_fp_recps_s<q>")]
2991 (define_insn "neon_vrsqrts<mode>"
2992 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2993 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2994 (match_operand:VCVTF 2 "s_register_operand" "w")]
2997 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2998 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3001 (define_insn "neon_vrsqrts<mode>"
3003 (match_operand:VH 0 "s_register_operand" "=w")
3004 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3005 (match_operand:VH 2 "s_register_operand" "w")]
3007 "TARGET_NEON_FP16INST"
3008 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3009 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3012 (define_expand "neon_vabs<mode>"
3013 [(match_operand:VDQW 0 "s_register_operand" "")
3014 (match_operand:VDQW 1 "s_register_operand" "")]
3017 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3021 (define_insn "neon_vqabs<mode>"
3022 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3023 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3026 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3027 [(set_attr "type" "neon_qabs<q>")]
3030 (define_insn "neon_bswap<mode>"
3031 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3032 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3034 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3035 [(set_attr "type" "neon_rev<q>")]
3038 (define_expand "neon_vneg<mode>"
3039 [(match_operand:VDQW 0 "s_register_operand" "")
3040 (match_operand:VDQW 1 "s_register_operand" "")]
3043 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3047 (define_expand "neon_copysignf<mode>"
3048 [(match_operand:VCVTF 0 "register_operand")
3049 (match_operand:VCVTF 1 "register_operand")
3050 (match_operand:VCVTF 2 "register_operand")]
3054 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3055 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
3056 rtvec v = rtvec_alloc (n_elt);
3058 /* Create bitmask for vector select. */
3059 for (i = 0; i < n_elt; ++i)
3060 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3062 emit_move_insn (v_bitmask,
3063 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3064 emit_move_insn (operands[0], operands[2]);
3065 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3066 <VCVTF:V_cmp_result>mode, 0);
3067 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3074 (define_insn "neon_vqneg<mode>"
3075 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3076 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3079 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_qneg<q>")]
3083 (define_insn "neon_vcls<mode>"
3084 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3085 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3088 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3089 [(set_attr "type" "neon_cls<q>")]
3092 (define_insn "clz<mode>2"
3093 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3094 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3096 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3097 [(set_attr "type" "neon_cnt<q>")]
3100 (define_expand "neon_vclz<mode>"
3101 [(match_operand:VDQIW 0 "s_register_operand" "")
3102 (match_operand:VDQIW 1 "s_register_operand" "")]
3105 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3109 (define_insn "popcount<mode>2"
3110 [(set (match_operand:VE 0 "s_register_operand" "=w")
3111 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3113 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3114 [(set_attr "type" "neon_cnt<q>")]
3117 (define_expand "neon_vcnt<mode>"
3118 [(match_operand:VE 0 "s_register_operand" "=w")
3119 (match_operand:VE 1 "s_register_operand" "w")]
3122 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3126 (define_insn "neon_vrecpe<mode>"
3127 [(set (match_operand:VH 0 "s_register_operand" "=w")
3128 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3130 "TARGET_NEON_FP16INST"
3131 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3132 [(set_attr "type" "neon_fp_recpe_s<q>")]
3135 (define_insn "neon_vrecpe<mode>"
3136 [(set (match_operand:V32 0 "s_register_operand" "=w")
3137 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3140 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3141 [(set_attr "type" "neon_fp_recpe_s<q>")]
3144 (define_insn "neon_vrsqrte<mode>"
3145 [(set (match_operand:V32 0 "s_register_operand" "=w")
3146 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3149 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3150 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3153 (define_expand "neon_vmvn<mode>"
3154 [(match_operand:VDQIW 0 "s_register_operand" "")
3155 (match_operand:VDQIW 1 "s_register_operand" "")]
3158 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3162 (define_insn "neon_vget_lane<mode>_sext_internal"
3163 [(set (match_operand:SI 0 "s_register_operand" "=r")
3165 (vec_select:<V_elem>
3166 (match_operand:VD 1 "s_register_operand" "w")
3167 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3170 if (BYTES_BIG_ENDIAN)
3172 int elt = INTVAL (operands[2]);
3173 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3174 operands[2] = GEN_INT (elt);
3176 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3178 [(set_attr "type" "neon_to_gp")]
3181 (define_insn "neon_vget_lane<mode>_zext_internal"
3182 [(set (match_operand:SI 0 "s_register_operand" "=r")
3184 (vec_select:<V_elem>
3185 (match_operand:VD 1 "s_register_operand" "w")
3186 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3189 if (BYTES_BIG_ENDIAN)
3191 int elt = INTVAL (operands[2]);
3192 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3193 operands[2] = GEN_INT (elt);
3195 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3197 [(set_attr "type" "neon_to_gp")]
3200 (define_insn "neon_vget_lane<mode>_sext_internal"
3201 [(set (match_operand:SI 0 "s_register_operand" "=r")
3203 (vec_select:<V_elem>
3204 (match_operand:VQ2 1 "s_register_operand" "w")
3205 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3209 int regno = REGNO (operands[1]);
3210 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3211 unsigned int elt = INTVAL (operands[2]);
3212 unsigned int elt_adj = elt % halfelts;
3214 if (BYTES_BIG_ENDIAN)
3215 elt_adj = halfelts - 1 - elt_adj;
3217 ops[0] = operands[0];
3218 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3219 ops[2] = GEN_INT (elt_adj);
3220 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3224 [(set_attr "type" "neon_to_gp_q")]
3227 (define_insn "neon_vget_lane<mode>_zext_internal"
3228 [(set (match_operand:SI 0 "s_register_operand" "=r")
3230 (vec_select:<V_elem>
3231 (match_operand:VQ2 1 "s_register_operand" "w")
3232 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3236 int regno = REGNO (operands[1]);
3237 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3238 unsigned int elt = INTVAL (operands[2]);
3239 unsigned int elt_adj = elt % halfelts;
3241 if (BYTES_BIG_ENDIAN)
3242 elt_adj = halfelts - 1 - elt_adj;
3244 ops[0] = operands[0];
3245 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3246 ops[2] = GEN_INT (elt_adj);
3247 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3251 [(set_attr "type" "neon_to_gp_q")]
3254 (define_expand "neon_vget_lane<mode>"
3255 [(match_operand:<V_ext> 0 "s_register_operand" "")
3256 (match_operand:VDQW 1 "s_register_operand" "")
3257 (match_operand:SI 2 "immediate_operand" "")]
3260 if (BYTES_BIG_ENDIAN)
3262 /* The intrinsics are defined in terms of a model where the
3263 element ordering in memory is vldm order, whereas the generic
3264 RTL is defined in terms of a model where the element ordering
3265 in memory is array order. Convert the lane number to conform
3267 unsigned int elt = INTVAL (operands[2]);
3268 unsigned int reg_nelts
3269 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3270 elt ^= reg_nelts - 1;
3271 operands[2] = GEN_INT (elt);
3274 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3275 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3277 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3283 (define_expand "neon_vget_laneu<mode>"
3284 [(match_operand:<V_ext> 0 "s_register_operand" "")
3285 (match_operand:VDQIW 1 "s_register_operand" "")
3286 (match_operand:SI 2 "immediate_operand" "")]
3289 if (BYTES_BIG_ENDIAN)
3291 /* The intrinsics are defined in terms of a model where the
3292 element ordering in memory is vldm order, whereas the generic
3293 RTL is defined in terms of a model where the element ordering
3294 in memory is array order. Convert the lane number to conform
3296 unsigned int elt = INTVAL (operands[2]);
3297 unsigned int reg_nelts
3298 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3299 elt ^= reg_nelts - 1;
3300 operands[2] = GEN_INT (elt);
3303 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3304 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3306 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3312 (define_expand "neon_vget_lanedi"
3313 [(match_operand:DI 0 "s_register_operand" "=r")
3314 (match_operand:DI 1 "s_register_operand" "w")
3315 (match_operand:SI 2 "immediate_operand" "")]
3318 emit_move_insn (operands[0], operands[1]);
3322 (define_expand "neon_vget_lanev2di"
3323 [(match_operand:DI 0 "s_register_operand" "")
3324 (match_operand:V2DI 1 "s_register_operand" "")
3325 (match_operand:SI 2 "immediate_operand" "")]
3330 if (BYTES_BIG_ENDIAN)
3332 /* The intrinsics are defined in terms of a model where the
3333 element ordering in memory is vldm order, whereas the generic
3334 RTL is defined in terms of a model where the element ordering
3335 in memory is array order. Convert the lane number to conform
3337 unsigned int elt = INTVAL (operands[2]);
3338 unsigned int reg_nelts = 2;
3339 elt ^= reg_nelts - 1;
3340 operands[2] = GEN_INT (elt);
3343 lane = INTVAL (operands[2]);
3344 gcc_assert ((lane ==0) || (lane == 1));
3345 emit_move_insn (operands[0], lane == 0
3346 ? gen_lowpart (DImode, operands[1])
3347 : gen_highpart (DImode, operands[1]));
3351 (define_expand "neon_vset_lane<mode>"
3352 [(match_operand:VDQ 0 "s_register_operand" "=w")
3353 (match_operand:<V_elem> 1 "s_register_operand" "r")
3354 (match_operand:VDQ 2 "s_register_operand" "0")
3355 (match_operand:SI 3 "immediate_operand" "i")]
3358 unsigned int elt = INTVAL (operands[3]);
3360 if (BYTES_BIG_ENDIAN)
3362 unsigned int reg_nelts
3363 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3364 elt ^= reg_nelts - 1;
3367 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3368 GEN_INT (1 << elt), operands[2]));
3372 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3374 (define_expand "neon_vset_lanedi"
3375 [(match_operand:DI 0 "s_register_operand" "=w")
3376 (match_operand:DI 1 "s_register_operand" "r")
3377 (match_operand:DI 2 "s_register_operand" "0")
3378 (match_operand:SI 3 "immediate_operand" "i")]
3381 emit_move_insn (operands[0], operands[1]);
3385 (define_expand "neon_vcreate<mode>"
3386 [(match_operand:VD_RE 0 "s_register_operand" "")
3387 (match_operand:DI 1 "general_operand" "")]
3390 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3391 emit_move_insn (operands[0], src);
3395 (define_insn "neon_vdup_n<mode>"
3396 [(set (match_operand:VX 0 "s_register_operand" "=w")
3397 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3399 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3400 [(set_attr "type" "neon_from_gp<q>")]
3403 (define_insn "neon_vdup_nv4hf"
3404 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3405 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3408 [(set_attr "type" "neon_from_gp")]
3411 (define_insn "neon_vdup_nv8hf"
3412 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3413 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3416 [(set_attr "type" "neon_from_gp_q")]
3419 (define_insn "neon_vdup_n<mode>"
3420 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3421 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3424 vdup.<V_sz_elem>\t%<V_reg>0, %1
3425 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3426 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3429 (define_expand "neon_vdup_ndi"
3430 [(match_operand:DI 0 "s_register_operand" "=w")
3431 (match_operand:DI 1 "s_register_operand" "r")]
3434 emit_move_insn (operands[0], operands[1]);
3439 (define_insn "neon_vdup_nv2di"
3440 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3441 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3444 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3445 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3446 [(set_attr "length" "8")
3447 (set_attr "type" "multiple")]
3450 (define_insn "neon_vdup_lane<mode>_internal"
3451 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3453 (vec_select:<V_elem>
3454 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3455 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3458 if (BYTES_BIG_ENDIAN)
3460 int elt = INTVAL (operands[2]);
3461 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3462 operands[2] = GEN_INT (elt);
3465 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3467 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3469 [(set_attr "type" "neon_dup<q>")]
3472 (define_insn "neon_vdup_lane<mode>_internal"
3473 [(set (match_operand:VH 0 "s_register_operand" "=w")
3475 (vec_select:<V_elem>
3476 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3477 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3478 "TARGET_NEON && TARGET_FP16"
3480 if (BYTES_BIG_ENDIAN)
3482 int elt = INTVAL (operands[2]);
3483 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3484 operands[2] = GEN_INT (elt);
3487 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3489 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3491 [(set_attr "type" "neon_dup<q>")]
3494 (define_expand "neon_vdup_lane<mode>"
3495 [(match_operand:VDQW 0 "s_register_operand" "=w")
3496 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3497 (match_operand:SI 2 "immediate_operand" "i")]
3500 if (BYTES_BIG_ENDIAN)
3502 unsigned int elt = INTVAL (operands[2]);
3503 unsigned int reg_nelts
3504 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3505 elt ^= reg_nelts - 1;
3506 operands[2] = GEN_INT (elt);
3508 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3513 (define_expand "neon_vdup_lane<mode>"
3514 [(match_operand:VH 0 "s_register_operand")
3515 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3516 (match_operand:SI 2 "immediate_operand")]
3517 "TARGET_NEON && TARGET_FP16"
3519 if (BYTES_BIG_ENDIAN)
3521 unsigned int elt = INTVAL (operands[2]);
3522 unsigned int reg_nelts
3523 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3524 elt ^= reg_nelts - 1;
3525 operands[2] = GEN_INT (elt);
3527 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3532 ; Scalar index is ignored, since only zero is valid here.
3533 (define_expand "neon_vdup_lanedi"
3534 [(match_operand:DI 0 "s_register_operand" "=w")
3535 (match_operand:DI 1 "s_register_operand" "w")
3536 (match_operand:SI 2 "immediate_operand" "i")]
3539 emit_move_insn (operands[0], operands[1]);
3543 ; Likewise for v2di, as the DImode second operand has only a single element.
3544 (define_expand "neon_vdup_lanev2di"
3545 [(match_operand:V2DI 0 "s_register_operand" "=w")
3546 (match_operand:DI 1 "s_register_operand" "w")
3547 (match_operand:SI 2 "immediate_operand" "i")]
3550 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3554 ; Disabled before reload because we don't want combine doing something silly,
3555 ; but used by the post-reload expansion of neon_vcombine.
3556 (define_insn "*neon_vswp<mode>"
3557 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3558 (match_operand:VDQX 1 "s_register_operand" "+w"))
3559 (set (match_dup 1) (match_dup 0))]
3560 "TARGET_NEON && reload_completed"
3561 "vswp\t%<V_reg>0, %<V_reg>1"
3562 [(set_attr "type" "neon_permute<q>")]
3565 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3567 ;; FIXME: A different implementation of this builtin could make it much
3568 ;; more likely that we wouldn't actually need to output anything (we could make
3569 ;; it so that the reg allocator puts things in the right places magically
3570 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3572 (define_insn_and_split "neon_vcombine<mode>"
3573 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3574 (vec_concat:<V_DOUBLE>
3575 (match_operand:VDX 1 "s_register_operand" "w")
3576 (match_operand:VDX 2 "s_register_operand" "w")))]
3579 "&& reload_completed"
3582 neon_split_vcombine (operands);
3585 [(set_attr "type" "multiple")]
3588 (define_expand "neon_vget_high<mode>"
3589 [(match_operand:<V_HALF> 0 "s_register_operand")
3590 (match_operand:VQX 1 "s_register_operand")]
3593 emit_move_insn (operands[0],
3594 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3595 GET_MODE_SIZE (<V_HALF>mode)));
3599 (define_expand "neon_vget_low<mode>"
3600 [(match_operand:<V_HALF> 0 "s_register_operand")
3601 (match_operand:VQX 1 "s_register_operand")]
3604 emit_move_insn (operands[0],
3605 simplify_gen_subreg (<V_HALF>mode, operands[1],
3610 (define_insn "float<mode><V_cvtto>2"
3611 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3612 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3613 "TARGET_NEON && !flag_rounding_math"
3614 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3615 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3618 (define_insn "floatuns<mode><V_cvtto>2"
3619 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3620 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3621 "TARGET_NEON && !flag_rounding_math"
3622 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3623 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3626 (define_insn "fix_trunc<mode><V_cvtto>2"
3627 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3628 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3630 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3631 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3634 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3635 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3636 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3638 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3642 (define_insn "neon_vcvt<sup><mode>"
3643 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3644 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3647 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3648 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3651 (define_insn "neon_vcvt<sup><mode>"
3652 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3653 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3656 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3657 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3660 (define_insn "neon_vcvtv4sfv4hf"
3661 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3662 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3664 "TARGET_NEON && TARGET_FP16"
3665 "vcvt.f32.f16\t%q0, %P1"
3666 [(set_attr "type" "neon_fp_cvt_widen_h")]
3669 (define_insn "neon_vcvtv4hfv4sf"
3670 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3671 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3673 "TARGET_NEON && TARGET_FP16"
3674 "vcvt.f16.f32\t%P0, %q1"
3675 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3678 (define_insn "neon_vcvt<sup><mode>"
3680 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3682 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3684 "TARGET_NEON_FP16INST"
3685 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3686 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3689 (define_insn "neon_vcvt<sup><mode>"
3691 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3693 [(match_operand:VH 1 "s_register_operand" "w")]
3695 "TARGET_NEON_FP16INST"
3696 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3697 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3700 (define_insn "neon_vcvt<sup>_n<mode>"
3701 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3702 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3703 (match_operand:SI 2 "immediate_operand" "i")]
3707 arm_const_bounds (operands[2], 1, 33);
3708 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3710 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3713 (define_insn "neon_vcvt<sup>_n<mode>"
3714 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3716 [(match_operand:VH 1 "s_register_operand" "w")
3717 (match_operand:SI 2 "immediate_operand" "i")]
3719 "TARGET_NEON_FP16INST"
3721 arm_const_bounds (operands[2], 0, 17);
3722 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3724 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3727 (define_insn "neon_vcvt<sup>_n<mode>"
3728 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3729 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3730 (match_operand:SI 2 "immediate_operand" "i")]
3734 arm_const_bounds (operands[2], 1, 33);
3735 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3737 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3740 (define_insn "neon_vcvt<sup>_n<mode>"
3741 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3743 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3744 (match_operand:SI 2 "immediate_operand" "i")]
3746 "TARGET_NEON_FP16INST"
3748 arm_const_bounds (operands[2], 0, 17);
3749 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3751 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3754 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3756 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3758 [(match_operand:VH 1 "s_register_operand" "w")]
3760 "TARGET_NEON_FP16INST"
3761 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3762 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3765 (define_insn "neon_vmovn<mode>"
3766 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3767 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3770 "vmovn.<V_if_elem>\t%P0, %q1"
3771 [(set_attr "type" "neon_shift_imm_narrow_q")]
3774 (define_insn "neon_vqmovn<sup><mode>"
3775 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3776 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3779 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3780 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3783 (define_insn "neon_vqmovun<mode>"
3784 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3785 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3788 "vqmovun.<V_s_elem>\t%P0, %q1"
3789 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3792 (define_insn "neon_vmovl<sup><mode>"
3793 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3794 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3797 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3798 [(set_attr "type" "neon_shift_imm_long")]
3801 (define_insn "neon_vmul_lane<mode>"
3802 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3803 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3804 (match_operand:VMD 2 "s_register_operand"
3805 "<scalar_mul_constraint>")
3806 (match_operand:SI 3 "immediate_operand" "i")]
3810 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3813 (if_then_else (match_test "<Is_float_mode>")
3814 (const_string "neon_fp_mul_s_scalar<q>")
3815 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3818 (define_insn "neon_vmul_lane<mode>"
3819 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3820 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3821 (match_operand:<V_HALF> 2 "s_register_operand"
3822 "<scalar_mul_constraint>")
3823 (match_operand:SI 3 "immediate_operand" "i")]
3827 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3830 (if_then_else (match_test "<Is_float_mode>")
3831 (const_string "neon_fp_mul_s_scalar<q>")
3832 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3835 (define_insn "neon_vmul_lane<mode>"
3836 [(set (match_operand:VH 0 "s_register_operand" "=w")
3837 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3838 (match_operand:V4HF 2 "s_register_operand"
3839 "<scalar_mul_constraint>")
3840 (match_operand:SI 3 "immediate_operand" "i")]
3842 "TARGET_NEON_FP16INST"
3843 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3844 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3847 (define_insn "neon_vmull<sup>_lane<mode>"
3848 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3849 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3850 (match_operand:VMDI 2 "s_register_operand"
3851 "<scalar_mul_constraint>")
3852 (match_operand:SI 3 "immediate_operand" "i")]
3856 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3858 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3861 (define_insn "neon_vqdmull_lane<mode>"
3862 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3863 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3864 (match_operand:VMDI 2 "s_register_operand"
3865 "<scalar_mul_constraint>")
3866 (match_operand:SI 3 "immediate_operand" "i")]
3867 UNSPEC_VQDMULL_LANE))]
3870 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3872 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3875 (define_insn "neon_vq<r>dmulh_lane<mode>"
3876 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3877 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3878 (match_operand:<V_HALF> 2 "s_register_operand"
3879 "<scalar_mul_constraint>")
3880 (match_operand:SI 3 "immediate_operand" "i")]
3884 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3886 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3889 (define_insn "neon_vq<r>dmulh_lane<mode>"
3890 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3891 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3892 (match_operand:VMDI 2 "s_register_operand"
3893 "<scalar_mul_constraint>")
3894 (match_operand:SI 3 "immediate_operand" "i")]
3898 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3900 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3903 ;; vqrdmlah_lane, vqrdmlsh_lane
3904 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3905 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3906 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3907 (match_operand:VMQI 2 "s_register_operand" "w")
3908 (match_operand:<V_HALF> 3 "s_register_operand"
3909 "<scalar_mul_constraint>")
3910 (match_operand:SI 4 "immediate_operand" "i")]
3915 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3917 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3920 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3921 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3922 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3923 (match_operand:VMDI 2 "s_register_operand" "w")
3924 (match_operand:VMDI 3 "s_register_operand"
3925 "<scalar_mul_constraint>")
3926 (match_operand:SI 4 "immediate_operand" "i")]
3931 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3933 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3936 (define_insn "neon_vmla_lane<mode>"
3937 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3938 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3939 (match_operand:VMD 2 "s_register_operand" "w")
3940 (match_operand:VMD 3 "s_register_operand"
3941 "<scalar_mul_constraint>")
3942 (match_operand:SI 4 "immediate_operand" "i")]
3946 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3949 (if_then_else (match_test "<Is_float_mode>")
3950 (const_string "neon_fp_mla_s_scalar<q>")
3951 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3954 (define_insn "neon_vmla_lane<mode>"
3955 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3956 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3957 (match_operand:VMQ 2 "s_register_operand" "w")
3958 (match_operand:<V_HALF> 3 "s_register_operand"
3959 "<scalar_mul_constraint>")
3960 (match_operand:SI 4 "immediate_operand" "i")]
3964 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3967 (if_then_else (match_test "<Is_float_mode>")
3968 (const_string "neon_fp_mla_s_scalar<q>")
3969 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3972 (define_insn "neon_vmlal<sup>_lane<mode>"
3973 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3974 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3975 (match_operand:VMDI 2 "s_register_operand" "w")
3976 (match_operand:VMDI 3 "s_register_operand"
3977 "<scalar_mul_constraint>")
3978 (match_operand:SI 4 "immediate_operand" "i")]
3982 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3984 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3987 (define_insn "neon_vqdmlal_lane<mode>"
3988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3989 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3990 (match_operand:VMDI 2 "s_register_operand" "w")
3991 (match_operand:VMDI 3 "s_register_operand"
3992 "<scalar_mul_constraint>")
3993 (match_operand:SI 4 "immediate_operand" "i")]
3994 UNSPEC_VQDMLAL_LANE))]
3997 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3999 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4002 (define_insn "neon_vmls_lane<mode>"
4003 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4004 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4005 (match_operand:VMD 2 "s_register_operand" "w")
4006 (match_operand:VMD 3 "s_register_operand"
4007 "<scalar_mul_constraint>")
4008 (match_operand:SI 4 "immediate_operand" "i")]
4012 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4015 (if_then_else (match_test "<Is_float_mode>")
4016 (const_string "neon_fp_mla_s_scalar<q>")
4017 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4020 (define_insn "neon_vmls_lane<mode>"
4021 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4022 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4023 (match_operand:VMQ 2 "s_register_operand" "w")
4024 (match_operand:<V_HALF> 3 "s_register_operand"
4025 "<scalar_mul_constraint>")
4026 (match_operand:SI 4 "immediate_operand" "i")]
4030 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4033 (if_then_else (match_test "<Is_float_mode>")
4034 (const_string "neon_fp_mla_s_scalar<q>")
4035 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4038 (define_insn "neon_vmlsl<sup>_lane<mode>"
4039 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4040 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4041 (match_operand:VMDI 2 "s_register_operand" "w")
4042 (match_operand:VMDI 3 "s_register_operand"
4043 "<scalar_mul_constraint>")
4044 (match_operand:SI 4 "immediate_operand" "i")]
4048 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4050 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4053 (define_insn "neon_vqdmlsl_lane<mode>"
4054 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4055 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4056 (match_operand:VMDI 2 "s_register_operand" "w")
4057 (match_operand:VMDI 3 "s_register_operand"
4058 "<scalar_mul_constraint>")
4059 (match_operand:SI 4 "immediate_operand" "i")]
4060 UNSPEC_VQDMLSL_LANE))]
4063 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4065 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4068 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4069 ; core register into a temp register, then use a scalar taken from that. This
4070 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4071 ; or extracted from another vector. The latter case it's currently better to
4072 ; use the "_lane" variant, and the former case can probably be implemented
4073 ; using vld1_lane, but that hasn't been done yet.
4075 (define_expand "neon_vmul_n<mode>"
4076 [(match_operand:VMD 0 "s_register_operand" "")
4077 (match_operand:VMD 1 "s_register_operand" "")
4078 (match_operand:<V_elem> 2 "s_register_operand" "")]
4081 rtx tmp = gen_reg_rtx (<MODE>mode);
4082 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4083 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4088 (define_expand "neon_vmul_n<mode>"
4089 [(match_operand:VMQ 0 "s_register_operand" "")
4090 (match_operand:VMQ 1 "s_register_operand" "")
4091 (match_operand:<V_elem> 2 "s_register_operand" "")]
4094 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4095 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4096 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4101 (define_expand "neon_vmul_n<mode>"
4102 [(match_operand:VH 0 "s_register_operand")
4103 (match_operand:VH 1 "s_register_operand")
4104 (match_operand:<V_elem> 2 "s_register_operand")]
4105 "TARGET_NEON_FP16INST"
4107 rtx tmp = gen_reg_rtx (V4HFmode);
4108 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4109 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4114 (define_expand "neon_vmulls_n<mode>"
4115 [(match_operand:<V_widen> 0 "s_register_operand" "")
4116 (match_operand:VMDI 1 "s_register_operand" "")
4117 (match_operand:<V_elem> 2 "s_register_operand" "")]
4120 rtx tmp = gen_reg_rtx (<MODE>mode);
4121 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4122 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4127 (define_expand "neon_vmullu_n<mode>"
4128 [(match_operand:<V_widen> 0 "s_register_operand" "")
4129 (match_operand:VMDI 1 "s_register_operand" "")
4130 (match_operand:<V_elem> 2 "s_register_operand" "")]
4133 rtx tmp = gen_reg_rtx (<MODE>mode);
4134 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4135 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4140 (define_expand "neon_vqdmull_n<mode>"
4141 [(match_operand:<V_widen> 0 "s_register_operand" "")
4142 (match_operand:VMDI 1 "s_register_operand" "")
4143 (match_operand:<V_elem> 2 "s_register_operand" "")]
4146 rtx tmp = gen_reg_rtx (<MODE>mode);
4147 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4148 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4153 (define_expand "neon_vqdmulh_n<mode>"
4154 [(match_operand:VMDI 0 "s_register_operand" "")
4155 (match_operand:VMDI 1 "s_register_operand" "")
4156 (match_operand:<V_elem> 2 "s_register_operand" "")]
4159 rtx tmp = gen_reg_rtx (<MODE>mode);
4160 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4161 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4166 (define_expand "neon_vqrdmulh_n<mode>"
4167 [(match_operand:VMDI 0 "s_register_operand" "")
4168 (match_operand:VMDI 1 "s_register_operand" "")
4169 (match_operand:<V_elem> 2 "s_register_operand" "")]
4172 rtx tmp = gen_reg_rtx (<MODE>mode);
4173 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4174 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4179 (define_expand "neon_vqdmulh_n<mode>"
4180 [(match_operand:VMQI 0 "s_register_operand" "")
4181 (match_operand:VMQI 1 "s_register_operand" "")
4182 (match_operand:<V_elem> 2 "s_register_operand" "")]
4185 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4186 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4187 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4192 (define_expand "neon_vqrdmulh_n<mode>"
4193 [(match_operand:VMQI 0 "s_register_operand" "")
4194 (match_operand:VMQI 1 "s_register_operand" "")
4195 (match_operand:<V_elem> 2 "s_register_operand" "")]
4198 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4199 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4200 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4205 (define_expand "neon_vmla_n<mode>"
4206 [(match_operand:VMD 0 "s_register_operand" "")
4207 (match_operand:VMD 1 "s_register_operand" "")
4208 (match_operand:VMD 2 "s_register_operand" "")
4209 (match_operand:<V_elem> 3 "s_register_operand" "")]
4212 rtx tmp = gen_reg_rtx (<MODE>mode);
4213 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4214 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4219 (define_expand "neon_vmla_n<mode>"
4220 [(match_operand:VMQ 0 "s_register_operand" "")
4221 (match_operand:VMQ 1 "s_register_operand" "")
4222 (match_operand:VMQ 2 "s_register_operand" "")
4223 (match_operand:<V_elem> 3 "s_register_operand" "")]
4226 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4227 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4228 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4233 (define_expand "neon_vmlals_n<mode>"
4234 [(match_operand:<V_widen> 0 "s_register_operand" "")
4235 (match_operand:<V_widen> 1 "s_register_operand" "")
4236 (match_operand:VMDI 2 "s_register_operand" "")
4237 (match_operand:<V_elem> 3 "s_register_operand" "")]
4240 rtx tmp = gen_reg_rtx (<MODE>mode);
4241 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4242 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4247 (define_expand "neon_vmlalu_n<mode>"
4248 [(match_operand:<V_widen> 0 "s_register_operand" "")
4249 (match_operand:<V_widen> 1 "s_register_operand" "")
4250 (match_operand:VMDI 2 "s_register_operand" "")
4251 (match_operand:<V_elem> 3 "s_register_operand" "")]
4254 rtx tmp = gen_reg_rtx (<MODE>mode);
4255 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4256 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4261 (define_expand "neon_vqdmlal_n<mode>"
4262 [(match_operand:<V_widen> 0 "s_register_operand" "")
4263 (match_operand:<V_widen> 1 "s_register_operand" "")
4264 (match_operand:VMDI 2 "s_register_operand" "")
4265 (match_operand:<V_elem> 3 "s_register_operand" "")]
4268 rtx tmp = gen_reg_rtx (<MODE>mode);
4269 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4270 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4275 (define_expand "neon_vmls_n<mode>"
4276 [(match_operand:VMD 0 "s_register_operand" "")
4277 (match_operand:VMD 1 "s_register_operand" "")
4278 (match_operand:VMD 2 "s_register_operand" "")
4279 (match_operand:<V_elem> 3 "s_register_operand" "")]
4282 rtx tmp = gen_reg_rtx (<MODE>mode);
4283 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4284 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4289 (define_expand "neon_vmls_n<mode>"
4290 [(match_operand:VMQ 0 "s_register_operand" "")
4291 (match_operand:VMQ 1 "s_register_operand" "")
4292 (match_operand:VMQ 2 "s_register_operand" "")
4293 (match_operand:<V_elem> 3 "s_register_operand" "")]
4296 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4297 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4298 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4303 (define_expand "neon_vmlsls_n<mode>"
4304 [(match_operand:<V_widen> 0 "s_register_operand" "")
4305 (match_operand:<V_widen> 1 "s_register_operand" "")
4306 (match_operand:VMDI 2 "s_register_operand" "")
4307 (match_operand:<V_elem> 3 "s_register_operand" "")]
4310 rtx tmp = gen_reg_rtx (<MODE>mode);
4311 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4312 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4317 (define_expand "neon_vmlslu_n<mode>"
4318 [(match_operand:<V_widen> 0 "s_register_operand" "")
4319 (match_operand:<V_widen> 1 "s_register_operand" "")
4320 (match_operand:VMDI 2 "s_register_operand" "")
4321 (match_operand:<V_elem> 3 "s_register_operand" "")]
4324 rtx tmp = gen_reg_rtx (<MODE>mode);
4325 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4326 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4331 (define_expand "neon_vqdmlsl_n<mode>"
4332 [(match_operand:<V_widen> 0 "s_register_operand" "")
4333 (match_operand:<V_widen> 1 "s_register_operand" "")
4334 (match_operand:VMDI 2 "s_register_operand" "")
4335 (match_operand:<V_elem> 3 "s_register_operand" "")]
4338 rtx tmp = gen_reg_rtx (<MODE>mode);
4339 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4340 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4345 (define_insn "neon_vext<mode>"
4346 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4347 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4348 (match_operand:VDQX 2 "s_register_operand" "w")
4349 (match_operand:SI 3 "immediate_operand" "i")]
4353 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4354 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4356 [(set_attr "type" "neon_ext<q>")]
4359 (define_insn "neon_vrev64<mode>"
4360 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4361 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4364 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4365 [(set_attr "type" "neon_rev<q>")]
4368 (define_insn "neon_vrev32<mode>"
4369 [(set (match_operand:VX 0 "s_register_operand" "=w")
4370 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4373 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4374 [(set_attr "type" "neon_rev<q>")]
4377 (define_insn "neon_vrev16<mode>"
4378 [(set (match_operand:VE 0 "s_register_operand" "=w")
4379 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4382 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4383 [(set_attr "type" "neon_rev<q>")]
4386 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4387 ; allocation. For an intrinsic of form:
4388 ; rD = vbsl_* (rS, rN, rM)
4389 ; We can use any of:
4390 ; vbsl rS, rN, rM (if D = S)
4391 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4392 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4394 (define_insn "neon_vbsl<mode>_internal"
4395 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4396 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4397 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4398 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4402 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4403 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4404 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4405 [(set_attr "type" "neon_bsl<q>")]
4408 (define_expand "neon_vbsl<mode>"
4409 [(set (match_operand:VDQX 0 "s_register_operand" "")
4410 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4411 (match_operand:VDQX 2 "s_register_operand" "")
4412 (match_operand:VDQX 3 "s_register_operand" "")]
4416 /* We can't alias operands together if they have different modes. */
4417 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4421 (define_insn "neon_v<shift_op><sup><mode>"
4422 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4423 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4424 (match_operand:VDQIX 2 "s_register_operand" "w")]
4427 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4428 [(set_attr "type" "neon_shift_imm<q>")]
4432 (define_insn "neon_v<shift_op><sup><mode>"
4433 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4434 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4435 (match_operand:VDQIX 2 "s_register_operand" "w")]
4438 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4439 [(set_attr "type" "neon_sat_shift_imm<q>")]
4443 (define_insn "neon_v<shift_op><sup>_n<mode>"
4444 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4445 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4446 (match_operand:SI 2 "immediate_operand" "i")]
4450 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4451 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4453 [(set_attr "type" "neon_shift_imm<q>")]
4456 ;; vshrn_n, vrshrn_n
4457 (define_insn "neon_v<shift_op>_n<mode>"
4458 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4459 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4460 (match_operand:SI 2 "immediate_operand" "i")]
4464 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4465 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4467 [(set_attr "type" "neon_shift_imm_narrow_q")]
4470 ;; vqshrn_n, vqrshrn_n
4471 (define_insn "neon_v<shift_op><sup>_n<mode>"
4472 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4473 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4474 (match_operand:SI 2 "immediate_operand" "i")]
4478 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4479 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4481 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4484 ;; vqshrun_n, vqrshrun_n
4485 (define_insn "neon_v<shift_op>_n<mode>"
4486 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4487 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4488 (match_operand:SI 2 "immediate_operand" "i")]
4492 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4493 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4495 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4498 (define_insn "neon_vshl_n<mode>"
4499 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4500 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4501 (match_operand:SI 2 "immediate_operand" "i")]
4505 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4506 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4508 [(set_attr "type" "neon_shift_imm<q>")]
4511 (define_insn "neon_vqshl_<sup>_n<mode>"
4512 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4513 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4514 (match_operand:SI 2 "immediate_operand" "i")]
4518 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4519 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4521 [(set_attr "type" "neon_sat_shift_imm<q>")]
4524 (define_insn "neon_vqshlu_n<mode>"
4525 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4526 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4527 (match_operand:SI 2 "immediate_operand" "i")]
4531 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4532 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4534 [(set_attr "type" "neon_sat_shift_imm<q>")]
4537 (define_insn "neon_vshll<sup>_n<mode>"
4538 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4539 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4540 (match_operand:SI 2 "immediate_operand" "i")]
4544 /* The boundaries are: 0 < imm <= size. */
4545 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4546 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4548 [(set_attr "type" "neon_shift_imm_long")]
4552 (define_insn "neon_v<shift_op><sup>_n<mode>"
4553 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4554 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4555 (match_operand:VDQIX 2 "s_register_operand" "w")
4556 (match_operand:SI 3 "immediate_operand" "i")]
4560 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4561 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4563 [(set_attr "type" "neon_shift_acc<q>")]
4566 (define_insn "neon_vsri_n<mode>"
4567 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4568 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4569 (match_operand:VDQIX 2 "s_register_operand" "w")
4570 (match_operand:SI 3 "immediate_operand" "i")]
4574 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4575 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4577 [(set_attr "type" "neon_shift_reg<q>")]
4580 (define_insn "neon_vsli_n<mode>"
4581 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4582 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4583 (match_operand:VDQIX 2 "s_register_operand" "w")
4584 (match_operand:SI 3 "immediate_operand" "i")]
4588 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4589 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4591 [(set_attr "type" "neon_shift_reg<q>")]
4594 (define_insn "neon_vtbl1v8qi"
4595 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4596 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4597 (match_operand:V8QI 2 "s_register_operand" "w")]
4600 "vtbl.8\t%P0, {%P1}, %P2"
4601 [(set_attr "type" "neon_tbl1")]
4604 (define_insn "neon_vtbl2v8qi"
4605 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4606 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4607 (match_operand:V8QI 2 "s_register_operand" "w")]
4612 int tabbase = REGNO (operands[1]);
4614 ops[0] = operands[0];
4615 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4616 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4617 ops[3] = operands[2];
4618 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4622 [(set_attr "type" "neon_tbl2")]
4625 (define_insn "neon_vtbl3v8qi"
4626 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4627 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4628 (match_operand:V8QI 2 "s_register_operand" "w")]
4633 int tabbase = REGNO (operands[1]);
4635 ops[0] = operands[0];
4636 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4637 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4638 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4639 ops[4] = operands[2];
4640 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4644 [(set_attr "type" "neon_tbl3")]
4647 (define_insn "neon_vtbl4v8qi"
4648 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4649 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4650 (match_operand:V8QI 2 "s_register_operand" "w")]
4655 int tabbase = REGNO (operands[1]);
4657 ops[0] = operands[0];
4658 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4659 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4660 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4661 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4662 ops[5] = operands[2];
4663 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4667 [(set_attr "type" "neon_tbl4")]
4670 ;; These three are used by the vec_perm infrastructure for V16QImode.
4671 (define_insn_and_split "neon_vtbl1v16qi"
4672 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4673 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4674 (match_operand:V16QI 2 "s_register_operand" "w")]
4678 "&& reload_completed"
4681 rtx op0, op1, op2, part0, part2;
4685 op1 = gen_lowpart (TImode, operands[1]);
4688 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4689 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4690 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4691 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4693 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4694 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4695 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4696 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4699 [(set_attr "type" "multiple")]
4702 (define_insn_and_split "neon_vtbl2v16qi"
4703 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4704 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4705 (match_operand:V16QI 2 "s_register_operand" "w")]
4709 "&& reload_completed"
4712 rtx op0, op1, op2, part0, part2;
4719 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4720 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4721 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4722 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4724 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4725 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4726 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4727 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4730 [(set_attr "type" "multiple")]
4733 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4734 ;; handle quad-word input modes, producing octa-word output modes. But
4735 ;; that requires us to add support for octa-word vector modes in moves.
4736 ;; That seems overkill for this one use in vec_perm.
4737 (define_insn_and_split "neon_vcombinev16qi"
4738 [(set (match_operand:OI 0 "s_register_operand" "=w")
4739 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4740 (match_operand:V16QI 2 "s_register_operand" "w")]
4744 "&& reload_completed"
4747 neon_split_vcombine (operands);
4750 [(set_attr "type" "multiple")]
4753 (define_insn "neon_vtbx1v8qi"
4754 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4755 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4756 (match_operand:V8QI 2 "s_register_operand" "w")
4757 (match_operand:V8QI 3 "s_register_operand" "w")]
4760 "vtbx.8\t%P0, {%P2}, %P3"
4761 [(set_attr "type" "neon_tbl1")]
4764 (define_insn "neon_vtbx2v8qi"
4765 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4766 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4767 (match_operand:TI 2 "s_register_operand" "w")
4768 (match_operand:V8QI 3 "s_register_operand" "w")]
4773 int tabbase = REGNO (operands[2]);
4775 ops[0] = operands[0];
4776 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4777 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4778 ops[3] = operands[3];
4779 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4783 [(set_attr "type" "neon_tbl2")]
4786 (define_insn "neon_vtbx3v8qi"
4787 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4788 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4789 (match_operand:EI 2 "s_register_operand" "w")
4790 (match_operand:V8QI 3 "s_register_operand" "w")]
4795 int tabbase = REGNO (operands[2]);
4797 ops[0] = operands[0];
4798 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4799 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4800 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4801 ops[4] = operands[3];
4802 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4806 [(set_attr "type" "neon_tbl3")]
4809 (define_insn "neon_vtbx4v8qi"
4810 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4811 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4812 (match_operand:OI 2 "s_register_operand" "w")
4813 (match_operand:V8QI 3 "s_register_operand" "w")]
4818 int tabbase = REGNO (operands[2]);
4820 ops[0] = operands[0];
4821 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4822 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4823 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4824 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4825 ops[5] = operands[3];
4826 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4830 [(set_attr "type" "neon_tbl4")]
4833 (define_expand "neon_vtrn<mode>_internal"
4835 [(set (match_operand:VDQWH 0 "s_register_operand")
4836 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4837 (match_operand:VDQWH 2 "s_register_operand")]
4839 (set (match_operand:VDQWH 3 "s_register_operand")
4840 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4845 ;; Note: Different operand numbering to handle tied registers correctly.
4846 (define_insn "*neon_vtrn<mode>_insn"
4847 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4848 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4849 (match_operand:VDQWH 3 "s_register_operand" "2")]
4851 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4852 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4855 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4856 [(set_attr "type" "neon_permute<q>")]
4859 (define_expand "neon_vzip<mode>_internal"
4861 [(set (match_operand:VDQWH 0 "s_register_operand")
4862 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4863 (match_operand:VDQWH 2 "s_register_operand")]
4865 (set (match_operand:VDQWH 3 "s_register_operand")
4866 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4871 ;; Note: Different operand numbering to handle tied registers correctly.
4872 (define_insn "*neon_vzip<mode>_insn"
4873 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4874 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4875 (match_operand:VDQWH 3 "s_register_operand" "2")]
4877 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4878 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4881 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4882 [(set_attr "type" "neon_zip<q>")]
4885 (define_expand "neon_vuzp<mode>_internal"
4887 [(set (match_operand:VDQWH 0 "s_register_operand")
4888 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4889 (match_operand:VDQWH 2 "s_register_operand")]
4891 (set (match_operand:VDQWH 3 "s_register_operand" "")
4892 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4897 ;; Note: Different operand numbering to handle tied registers correctly.
4898 (define_insn "*neon_vuzp<mode>_insn"
4899 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4900 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4901 (match_operand:VDQWH 3 "s_register_operand" "2")]
4903 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4904 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4907 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4908 [(set_attr "type" "neon_zip<q>")]
4911 (define_expand "vec_load_lanes<mode><mode>"
4912 [(set (match_operand:VDQX 0 "s_register_operand")
4913 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4917 (define_insn "neon_vld1<mode>"
4918 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4919 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4922 "vld1.<V_sz_elem>\t%h0, %A1"
4923 [(set_attr "type" "neon_load1_1reg<q>")]
4926 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4927 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4929 (define_insn "neon_vld1_lane<mode>"
4930 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4931 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4932 (match_operand:VDX 2 "s_register_operand" "0")
4933 (match_operand:SI 3 "immediate_operand" "i")]
4937 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4938 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4939 operands[3] = GEN_INT (lane);
4941 return "vld1.<V_sz_elem>\t%P0, %A1";
4943 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4945 [(set_attr "type" "neon_load1_one_lane<q>")]
4948 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4949 ;; here on big endian targets.
4950 (define_insn "neon_vld1_lane<mode>"
4951 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4952 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4953 (match_operand:VQX 2 "s_register_operand" "0")
4954 (match_operand:SI 3 "immediate_operand" "i")]
4958 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4959 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4960 operands[3] = GEN_INT (lane);
4961 int regno = REGNO (operands[0]);
4962 if (lane >= max / 2)
4966 operands[3] = GEN_INT (lane);
4968 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4970 return "vld1.<V_sz_elem>\t%P0, %A1";
4972 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4974 [(set_attr "type" "neon_load1_one_lane<q>")]
4977 (define_insn "neon_vld1_dup<mode>"
4978 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4979 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4981 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4982 [(set_attr "type" "neon_load1_all_lanes<q>")]
4985 ;; Special case for DImode. Treat it exactly like a simple load.
4986 (define_expand "neon_vld1_dupdi"
4987 [(set (match_operand:DI 0 "s_register_operand" "")
4988 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4994 (define_insn "neon_vld1_dup<mode>"
4995 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4996 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4999 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5001 [(set_attr "type" "neon_load1_all_lanes<q>")]
5004 (define_insn_and_split "neon_vld1_dupv2di"
5005 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5006 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5009 "&& reload_completed"
5012 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5013 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5014 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5017 [(set_attr "length" "8")
5018 (set_attr "type" "neon_load1_all_lanes_q")]
5021 (define_expand "vec_store_lanes<mode><mode>"
5022 [(set (match_operand:VDQX 0 "neon_struct_operand")
5023 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5027 (define_insn "neon_vst1<mode>"
5028 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5029 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5032 "vst1.<V_sz_elem>\t%h1, %A0"
5033 [(set_attr "type" "neon_store1_1reg<q>")])
5035 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5036 ;; here on big endian targets.
5037 (define_insn "neon_vst1_lane<mode>"
5038 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5040 [(match_operand:VDX 1 "s_register_operand" "w")
5041 (match_operand:SI 2 "immediate_operand" "i")]
5045 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5046 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5047 operands[2] = GEN_INT (lane);
5049 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5051 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5053 [(set_attr "type" "neon_store1_one_lane<q>")]
5056 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5057 ;; here on big endian targets.
5058 (define_insn "neon_vst1_lane<mode>"
5059 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5061 [(match_operand:VQX 1 "s_register_operand" "w")
5062 (match_operand:SI 2 "immediate_operand" "i")]
5066 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5067 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5068 int regno = REGNO (operands[1]);
5069 if (lane >= max / 2)
5074 operands[2] = GEN_INT (lane);
5075 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5077 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5079 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5081 [(set_attr "type" "neon_store1_one_lane<q>")]
5084 (define_expand "vec_load_lanesti<mode>"
5085 [(set (match_operand:TI 0 "s_register_operand")
5086 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5087 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5091 (define_insn "neon_vld2<mode>"
5092 [(set (match_operand:TI 0 "s_register_operand" "=w")
5093 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5094 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5098 if (<V_sz_elem> == 64)
5099 return "vld1.64\t%h0, %A1";
5101 return "vld2.<V_sz_elem>\t%h0, %A1";
5104 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5105 (const_string "neon_load1_2reg<q>")
5106 (const_string "neon_load2_2reg<q>")))]
5109 (define_expand "vec_load_lanesoi<mode>"
5110 [(set (match_operand:OI 0 "s_register_operand")
5111 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5112 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5116 (define_insn "neon_vld2<mode>"
5117 [(set (match_operand:OI 0 "s_register_operand" "=w")
5118 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5119 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5122 "vld2.<V_sz_elem>\t%h0, %A1"
5123 [(set_attr "type" "neon_load2_2reg_q")])
5125 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5126 ;; here on big endian targets.
5127 (define_insn "neon_vld2_lane<mode>"
5128 [(set (match_operand:TI 0 "s_register_operand" "=w")
5129 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5130 (match_operand:TI 2 "s_register_operand" "0")
5131 (match_operand:SI 3 "immediate_operand" "i")
5132 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5136 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5137 int regno = REGNO (operands[0]);
5139 ops[0] = gen_rtx_REG (DImode, regno);
5140 ops[1] = gen_rtx_REG (DImode, regno + 2);
5141 ops[2] = operands[1];
5142 ops[3] = GEN_INT (lane);
5143 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5146 [(set_attr "type" "neon_load2_one_lane<q>")]
5149 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5150 ;; here on big endian targets.
5151 (define_insn "neon_vld2_lane<mode>"
5152 [(set (match_operand:OI 0 "s_register_operand" "=w")
5153 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5154 (match_operand:OI 2 "s_register_operand" "0")
5155 (match_operand:SI 3 "immediate_operand" "i")
5156 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5160 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5161 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5162 int regno = REGNO (operands[0]);
5164 if (lane >= max / 2)
5169 ops[0] = gen_rtx_REG (DImode, regno);
5170 ops[1] = gen_rtx_REG (DImode, regno + 4);
5171 ops[2] = operands[1];
5172 ops[3] = GEN_INT (lane);
5173 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5176 [(set_attr "type" "neon_load2_one_lane<q>")]
5179 (define_insn "neon_vld2_dup<mode>"
5180 [(set (match_operand:TI 0 "s_register_operand" "=w")
5181 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5182 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5187 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5189 return "vld1.<V_sz_elem>\t%h0, %A1";
5192 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5193 (const_string "neon_load2_all_lanes<q>")
5194 (const_string "neon_load1_1reg<q>")))]
5197 (define_expand "vec_store_lanesti<mode>"
5198 [(set (match_operand:TI 0 "neon_struct_operand")
5199 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5200 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 (define_insn "neon_vst2<mode>"
5205 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5206 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5207 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5211 if (<V_sz_elem> == 64)
5212 return "vst1.64\t%h1, %A0";
5214 return "vst2.<V_sz_elem>\t%h1, %A0";
5217 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5218 (const_string "neon_store1_2reg<q>")
5219 (const_string "neon_store2_one_lane<q>")))]
5222 (define_expand "vec_store_lanesoi<mode>"
5223 [(set (match_operand:OI 0 "neon_struct_operand")
5224 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5225 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5229 (define_insn "neon_vst2<mode>"
5230 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5231 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5232 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5235 "vst2.<V_sz_elem>\t%h1, %A0"
5236 [(set_attr "type" "neon_store2_4reg<q>")]
5239 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5240 ;; here on big endian targets.
5241 (define_insn "neon_vst2_lane<mode>"
5242 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5243 (unspec:<V_two_elem>
5244 [(match_operand:TI 1 "s_register_operand" "w")
5245 (match_operand:SI 2 "immediate_operand" "i")
5246 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5251 int regno = REGNO (operands[1]);
5253 ops[0] = operands[0];
5254 ops[1] = gen_rtx_REG (DImode, regno);
5255 ops[2] = gen_rtx_REG (DImode, regno + 2);
5256 ops[3] = GEN_INT (lane);
5257 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5260 [(set_attr "type" "neon_store2_one_lane<q>")]
5263 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5264 ;; here on big endian targets.
5265 (define_insn "neon_vst2_lane<mode>"
5266 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5267 (unspec:<V_two_elem>
5268 [(match_operand:OI 1 "s_register_operand" "w")
5269 (match_operand:SI 2 "immediate_operand" "i")
5270 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5275 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5276 int regno = REGNO (operands[1]);
5278 if (lane >= max / 2)
5283 ops[0] = operands[0];
5284 ops[1] = gen_rtx_REG (DImode, regno);
5285 ops[2] = gen_rtx_REG (DImode, regno + 4);
5286 ops[3] = GEN_INT (lane);
5287 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5290 [(set_attr "type" "neon_store2_one_lane<q>")]
5293 (define_expand "vec_load_lanesei<mode>"
5294 [(set (match_operand:EI 0 "s_register_operand")
5295 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5296 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5300 (define_insn "neon_vld3<mode>"
5301 [(set (match_operand:EI 0 "s_register_operand" "=w")
5302 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5303 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5307 if (<V_sz_elem> == 64)
5308 return "vld1.64\t%h0, %A1";
5310 return "vld3.<V_sz_elem>\t%h0, %A1";
5313 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5314 (const_string "neon_load1_3reg<q>")
5315 (const_string "neon_load3_3reg<q>")))]
5318 (define_expand "vec_load_lanesci<mode>"
5319 [(match_operand:CI 0 "s_register_operand")
5320 (match_operand:CI 1 "neon_struct_operand")
5321 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5328 (define_expand "neon_vld3<mode>"
5329 [(match_operand:CI 0 "s_register_operand")
5330 (match_operand:CI 1 "neon_struct_operand")
5331 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5336 mem = adjust_address (operands[1], EImode, 0);
5337 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5338 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5339 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5343 (define_insn "neon_vld3qa<mode>"
5344 [(set (match_operand:CI 0 "s_register_operand" "=w")
5345 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5346 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350 int regno = REGNO (operands[0]);
5352 ops[0] = gen_rtx_REG (DImode, regno);
5353 ops[1] = gen_rtx_REG (DImode, regno + 4);
5354 ops[2] = gen_rtx_REG (DImode, regno + 8);
5355 ops[3] = operands[1];
5356 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5359 [(set_attr "type" "neon_load3_3reg<q>")]
5362 (define_insn "neon_vld3qb<mode>"
5363 [(set (match_operand:CI 0 "s_register_operand" "=w")
5364 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5365 (match_operand:CI 2 "s_register_operand" "0")
5366 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5370 int regno = REGNO (operands[0]);
5372 ops[0] = gen_rtx_REG (DImode, regno + 2);
5373 ops[1] = gen_rtx_REG (DImode, regno + 6);
5374 ops[2] = gen_rtx_REG (DImode, regno + 10);
5375 ops[3] = operands[1];
5376 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5379 [(set_attr "type" "neon_load3_3reg<q>")]
5382 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5383 ;; here on big endian targets.
5384 (define_insn "neon_vld3_lane<mode>"
5385 [(set (match_operand:EI 0 "s_register_operand" "=w")
5386 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5387 (match_operand:EI 2 "s_register_operand" "0")
5388 (match_operand:SI 3 "immediate_operand" "i")
5389 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5393 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5394 int regno = REGNO (operands[0]);
5396 ops[0] = gen_rtx_REG (DImode, regno);
5397 ops[1] = gen_rtx_REG (DImode, regno + 2);
5398 ops[2] = gen_rtx_REG (DImode, regno + 4);
5399 ops[3] = operands[1];
5400 ops[4] = GEN_INT (lane);
5401 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5405 [(set_attr "type" "neon_load3_one_lane<q>")]
5408 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5409 ;; here on big endian targets.
5410 (define_insn "neon_vld3_lane<mode>"
5411 [(set (match_operand:CI 0 "s_register_operand" "=w")
5412 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5413 (match_operand:CI 2 "s_register_operand" "0")
5414 (match_operand:SI 3 "immediate_operand" "i")
5415 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5419 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5420 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5421 int regno = REGNO (operands[0]);
5423 if (lane >= max / 2)
5428 ops[0] = gen_rtx_REG (DImode, regno);
5429 ops[1] = gen_rtx_REG (DImode, regno + 4);
5430 ops[2] = gen_rtx_REG (DImode, regno + 8);
5431 ops[3] = operands[1];
5432 ops[4] = GEN_INT (lane);
5433 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5437 [(set_attr "type" "neon_load3_one_lane<q>")]
5440 (define_insn "neon_vld3_dup<mode>"
5441 [(set (match_operand:EI 0 "s_register_operand" "=w")
5442 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5443 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5447 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5449 int regno = REGNO (operands[0]);
5451 ops[0] = gen_rtx_REG (DImode, regno);
5452 ops[1] = gen_rtx_REG (DImode, regno + 2);
5453 ops[2] = gen_rtx_REG (DImode, regno + 4);
5454 ops[3] = operands[1];
5455 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5459 return "vld1.<V_sz_elem>\t%h0, %A1";
5462 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5463 (const_string "neon_load3_all_lanes<q>")
5464 (const_string "neon_load1_1reg<q>")))])
5466 (define_expand "vec_store_lanesei<mode>"
5467 [(set (match_operand:EI 0 "neon_struct_operand")
5468 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5469 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5473 (define_insn "neon_vst3<mode>"
5474 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5475 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5476 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480 if (<V_sz_elem> == 64)
5481 return "vst1.64\t%h1, %A0";
5483 return "vst3.<V_sz_elem>\t%h1, %A0";
5486 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5487 (const_string "neon_store1_3reg<q>")
5488 (const_string "neon_store3_one_lane<q>")))])
5490 (define_expand "vec_store_lanesci<mode>"
5491 [(match_operand:CI 0 "neon_struct_operand")
5492 (match_operand:CI 1 "s_register_operand")
5493 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5496 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5500 (define_expand "neon_vst3<mode>"
5501 [(match_operand:CI 0 "neon_struct_operand")
5502 (match_operand:CI 1 "s_register_operand")
5503 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5508 mem = adjust_address (operands[0], EImode, 0);
5509 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5510 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5511 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5515 (define_insn "neon_vst3qa<mode>"
5516 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5517 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5518 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522 int regno = REGNO (operands[1]);
5524 ops[0] = operands[0];
5525 ops[1] = gen_rtx_REG (DImode, regno);
5526 ops[2] = gen_rtx_REG (DImode, regno + 4);
5527 ops[3] = gen_rtx_REG (DImode, regno + 8);
5528 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5531 [(set_attr "type" "neon_store3_3reg<q>")]
5534 (define_insn "neon_vst3qb<mode>"
5535 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5536 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5537 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5541 int regno = REGNO (operands[1]);
5543 ops[0] = operands[0];
5544 ops[1] = gen_rtx_REG (DImode, regno + 2);
5545 ops[2] = gen_rtx_REG (DImode, regno + 6);
5546 ops[3] = gen_rtx_REG (DImode, regno + 10);
5547 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5550 [(set_attr "type" "neon_store3_3reg<q>")]
5553 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5554 ;; here on big endian targets.
5555 (define_insn "neon_vst3_lane<mode>"
5556 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5557 (unspec:<V_three_elem>
5558 [(match_operand:EI 1 "s_register_operand" "w")
5559 (match_operand:SI 2 "immediate_operand" "i")
5560 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5565 int regno = REGNO (operands[1]);
5567 ops[0] = operands[0];
5568 ops[1] = gen_rtx_REG (DImode, regno);
5569 ops[2] = gen_rtx_REG (DImode, regno + 2);
5570 ops[3] = gen_rtx_REG (DImode, regno + 4);
5571 ops[4] = GEN_INT (lane);
5572 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5576 [(set_attr "type" "neon_store3_one_lane<q>")]
5579 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5580 ;; here on big endian targets.
5581 (define_insn "neon_vst3_lane<mode>"
5582 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5583 (unspec:<V_three_elem>
5584 [(match_operand:CI 1 "s_register_operand" "w")
5585 (match_operand:SI 2 "immediate_operand" "i")
5586 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5591 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5592 int regno = REGNO (operands[1]);
5594 if (lane >= max / 2)
5599 ops[0] = operands[0];
5600 ops[1] = gen_rtx_REG (DImode, regno);
5601 ops[2] = gen_rtx_REG (DImode, regno + 4);
5602 ops[3] = gen_rtx_REG (DImode, regno + 8);
5603 ops[4] = GEN_INT (lane);
5604 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5608 [(set_attr "type" "neon_store3_one_lane<q>")]
5611 (define_expand "vec_load_lanesoi<mode>"
5612 [(set (match_operand:OI 0 "s_register_operand")
5613 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5614 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5618 (define_insn "neon_vld4<mode>"
5619 [(set (match_operand:OI 0 "s_register_operand" "=w")
5620 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5621 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625 if (<V_sz_elem> == 64)
5626 return "vld1.64\t%h0, %A1";
5628 return "vld4.<V_sz_elem>\t%h0, %A1";
5631 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5632 (const_string "neon_load1_4reg<q>")
5633 (const_string "neon_load4_4reg<q>")))]
5636 (define_expand "vec_load_lanesxi<mode>"
5637 [(match_operand:XI 0 "s_register_operand")
5638 (match_operand:XI 1 "neon_struct_operand")
5639 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5642 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5646 (define_expand "neon_vld4<mode>"
5647 [(match_operand:XI 0 "s_register_operand")
5648 (match_operand:XI 1 "neon_struct_operand")
5649 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5654 mem = adjust_address (operands[1], OImode, 0);
5655 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5656 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5657 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5661 (define_insn "neon_vld4qa<mode>"
5662 [(set (match_operand:XI 0 "s_register_operand" "=w")
5663 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5664 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668 int regno = REGNO (operands[0]);
5670 ops[0] = gen_rtx_REG (DImode, regno);
5671 ops[1] = gen_rtx_REG (DImode, regno + 4);
5672 ops[2] = gen_rtx_REG (DImode, regno + 8);
5673 ops[3] = gen_rtx_REG (DImode, regno + 12);
5674 ops[4] = operands[1];
5675 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5678 [(set_attr "type" "neon_load4_4reg<q>")]
5681 (define_insn "neon_vld4qb<mode>"
5682 [(set (match_operand:XI 0 "s_register_operand" "=w")
5683 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5684 (match_operand:XI 2 "s_register_operand" "0")
5685 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5689 int regno = REGNO (operands[0]);
5691 ops[0] = gen_rtx_REG (DImode, regno + 2);
5692 ops[1] = gen_rtx_REG (DImode, regno + 6);
5693 ops[2] = gen_rtx_REG (DImode, regno + 10);
5694 ops[3] = gen_rtx_REG (DImode, regno + 14);
5695 ops[4] = operands[1];
5696 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5699 [(set_attr "type" "neon_load4_4reg<q>")]
5702 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5703 ;; here on big endian targets.
5704 (define_insn "neon_vld4_lane<mode>"
5705 [(set (match_operand:OI 0 "s_register_operand" "=w")
5706 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5707 (match_operand:OI 2 "s_register_operand" "0")
5708 (match_operand:SI 3 "immediate_operand" "i")
5709 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5713 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5714 int regno = REGNO (operands[0]);
5716 ops[0] = gen_rtx_REG (DImode, regno);
5717 ops[1] = gen_rtx_REG (DImode, regno + 2);
5718 ops[2] = gen_rtx_REG (DImode, regno + 4);
5719 ops[3] = gen_rtx_REG (DImode, regno + 6);
5720 ops[4] = operands[1];
5721 ops[5] = GEN_INT (lane);
5722 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5726 [(set_attr "type" "neon_load4_one_lane<q>")]
5729 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5730 ;; here on big endian targets.
5731 (define_insn "neon_vld4_lane<mode>"
5732 [(set (match_operand:XI 0 "s_register_operand" "=w")
5733 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5734 (match_operand:XI 2 "s_register_operand" "0")
5735 (match_operand:SI 3 "immediate_operand" "i")
5736 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5740 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5741 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5742 int regno = REGNO (operands[0]);
5744 if (lane >= max / 2)
5749 ops[0] = gen_rtx_REG (DImode, regno);
5750 ops[1] = gen_rtx_REG (DImode, regno + 4);
5751 ops[2] = gen_rtx_REG (DImode, regno + 8);
5752 ops[3] = gen_rtx_REG (DImode, regno + 12);
5753 ops[4] = operands[1];
5754 ops[5] = GEN_INT (lane);
5755 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5759 [(set_attr "type" "neon_load4_one_lane<q>")]
5762 (define_insn "neon_vld4_dup<mode>"
5763 [(set (match_operand:OI 0 "s_register_operand" "=w")
5764 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5765 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5769 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5771 int regno = REGNO (operands[0]);
5773 ops[0] = gen_rtx_REG (DImode, regno);
5774 ops[1] = gen_rtx_REG (DImode, regno + 2);
5775 ops[2] = gen_rtx_REG (DImode, regno + 4);
5776 ops[3] = gen_rtx_REG (DImode, regno + 6);
5777 ops[4] = operands[1];
5778 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5783 return "vld1.<V_sz_elem>\t%h0, %A1";
5786 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5787 (const_string "neon_load4_all_lanes<q>")
5788 (const_string "neon_load1_1reg<q>")))]
5791 (define_expand "vec_store_lanesoi<mode>"
5792 [(set (match_operand:OI 0 "neon_struct_operand")
5793 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5794 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5798 (define_insn "neon_vst4<mode>"
5799 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5800 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5801 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5805 if (<V_sz_elem> == 64)
5806 return "vst1.64\t%h1, %A0";
5808 return "vst4.<V_sz_elem>\t%h1, %A0";
5811 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5812 (const_string "neon_store1_4reg<q>")
5813 (const_string "neon_store4_4reg<q>")))]
5816 (define_expand "vec_store_lanesxi<mode>"
5817 [(match_operand:XI 0 "neon_struct_operand")
5818 (match_operand:XI 1 "s_register_operand")
5819 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5822 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5826 (define_expand "neon_vst4<mode>"
5827 [(match_operand:XI 0 "neon_struct_operand")
5828 (match_operand:XI 1 "s_register_operand")
5829 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5834 mem = adjust_address (operands[0], OImode, 0);
5835 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5836 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5837 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5841 (define_insn "neon_vst4qa<mode>"
5842 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5843 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5844 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5848 int regno = REGNO (operands[1]);
5850 ops[0] = operands[0];
5851 ops[1] = gen_rtx_REG (DImode, regno);
5852 ops[2] = gen_rtx_REG (DImode, regno + 4);
5853 ops[3] = gen_rtx_REG (DImode, regno + 8);
5854 ops[4] = gen_rtx_REG (DImode, regno + 12);
5855 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5858 [(set_attr "type" "neon_store4_4reg<q>")]
5861 (define_insn "neon_vst4qb<mode>"
5862 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5863 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5864 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868 int regno = REGNO (operands[1]);
5870 ops[0] = operands[0];
5871 ops[1] = gen_rtx_REG (DImode, regno + 2);
5872 ops[2] = gen_rtx_REG (DImode, regno + 6);
5873 ops[3] = gen_rtx_REG (DImode, regno + 10);
5874 ops[4] = gen_rtx_REG (DImode, regno + 14);
5875 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5878 [(set_attr "type" "neon_store4_4reg<q>")]
5881 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5882 ;; here on big endian targets.
5883 (define_insn "neon_vst4_lane<mode>"
5884 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5885 (unspec:<V_four_elem>
5886 [(match_operand:OI 1 "s_register_operand" "w")
5887 (match_operand:SI 2 "immediate_operand" "i")
5888 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5892 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5893 int regno = REGNO (operands[1]);
5895 ops[0] = operands[0];
5896 ops[1] = gen_rtx_REG (DImode, regno);
5897 ops[2] = gen_rtx_REG (DImode, regno + 2);
5898 ops[3] = gen_rtx_REG (DImode, regno + 4);
5899 ops[4] = gen_rtx_REG (DImode, regno + 6);
5900 ops[5] = GEN_INT (lane);
5901 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5905 [(set_attr "type" "neon_store4_one_lane<q>")]
5908 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5909 ;; here on big endian targets.
5910 (define_insn "neon_vst4_lane<mode>"
5911 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5912 (unspec:<V_four_elem>
5913 [(match_operand:XI 1 "s_register_operand" "w")
5914 (match_operand:SI 2 "immediate_operand" "i")
5915 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5919 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5920 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5921 int regno = REGNO (operands[1]);
5923 if (lane >= max / 2)
5928 ops[0] = operands[0];
5929 ops[1] = gen_rtx_REG (DImode, regno);
5930 ops[2] = gen_rtx_REG (DImode, regno + 4);
5931 ops[3] = gen_rtx_REG (DImode, regno + 8);
5932 ops[4] = gen_rtx_REG (DImode, regno + 12);
5933 ops[5] = GEN_INT (lane);
5934 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5938 [(set_attr "type" "neon_store4_4reg<q>")]
5941 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5942 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5943 (SE:<V_unpack> (vec_select:<V_HALF>
5944 (match_operand:VU 1 "register_operand" "w")
5945 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5946 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5947 "vmovl.<US><V_sz_elem> %q0, %e1"
5948 [(set_attr "type" "neon_shift_imm_long")]
5951 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5952 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5953 (SE:<V_unpack> (vec_select:<V_HALF>
5954 (match_operand:VU 1 "register_operand" "w")
5955 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5956 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5957 "vmovl.<US><V_sz_elem> %q0, %f1"
5958 [(set_attr "type" "neon_shift_imm_long")]
5961 (define_expand "vec_unpack<US>_hi_<mode>"
5962 [(match_operand:<V_unpack> 0 "register_operand" "")
5963 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5964 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5966 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5969 for (i = 0; i < (<V_mode_nunits>/2); i++)
5970 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5972 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5973 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5980 (define_expand "vec_unpack<US>_lo_<mode>"
5981 [(match_operand:<V_unpack> 0 "register_operand" "")
5982 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5983 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5985 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5988 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5989 RTVEC_ELT (v, i) = GEN_INT (i);
5990 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5991 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5998 (define_insn "neon_vec_<US>mult_lo_<mode>"
5999 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6000 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6001 (match_operand:VU 1 "register_operand" "w")
6002 (match_operand:VU 2 "vect_par_constant_low" "")))
6003 (SE:<V_unpack> (vec_select:<V_HALF>
6004 (match_operand:VU 3 "register_operand" "w")
6006 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6007 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6008 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6011 (define_expand "vec_widen_<US>mult_lo_<mode>"
6012 [(match_operand:<V_unpack> 0 "register_operand" "")
6013 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6014 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6015 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6017 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6020 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6021 RTVEC_ELT (v, i) = GEN_INT (i);
6022 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6024 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6032 (define_insn "neon_vec_<US>mult_hi_<mode>"
6033 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6034 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6035 (match_operand:VU 1 "register_operand" "w")
6036 (match_operand:VU 2 "vect_par_constant_high" "")))
6037 (SE:<V_unpack> (vec_select:<V_HALF>
6038 (match_operand:VU 3 "register_operand" "w")
6040 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6041 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6042 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6045 (define_expand "vec_widen_<US>mult_hi_<mode>"
6046 [(match_operand:<V_unpack> 0 "register_operand" "")
6047 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6048 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6049 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6051 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6054 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6055 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6056 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6058 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6067 (define_insn "neon_vec_<US>shiftl_<mode>"
6068 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6069 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6070 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6073 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6075 [(set_attr "type" "neon_shift_imm_long")]
6078 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6079 [(match_operand:<V_unpack> 0 "register_operand" "")
6080 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6081 (match_operand:SI 2 "immediate_operand" "i")]
6082 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6084 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6085 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6091 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6092 [(match_operand:<V_unpack> 0 "register_operand" "")
6093 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6094 (match_operand:SI 2 "immediate_operand" "i")]
6095 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6097 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6098 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6099 GET_MODE_SIZE (<V_HALF>mode)),
6105 ;; Vectorize for non-neon-quad case
6106 (define_insn "neon_unpack<US>_<mode>"
6107 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6108 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6110 "vmovl.<US><V_sz_elem> %q0, %P1"
6111 [(set_attr "type" "neon_move")]
6114 (define_expand "vec_unpack<US>_lo_<mode>"
6115 [(match_operand:<V_double_width> 0 "register_operand" "")
6116 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6119 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6120 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6121 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6127 (define_expand "vec_unpack<US>_hi_<mode>"
6128 [(match_operand:<V_double_width> 0 "register_operand" "")
6129 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6132 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6133 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6134 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6140 (define_insn "neon_vec_<US>mult_<mode>"
6141 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6142 (mult:<V_widen> (SE:<V_widen>
6143 (match_operand:VDI 1 "register_operand" "w"))
6145 (match_operand:VDI 2 "register_operand" "w"))))]
6147 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6148 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6151 (define_expand "vec_widen_<US>mult_hi_<mode>"
6152 [(match_operand:<V_double_width> 0 "register_operand" "")
6153 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6154 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6157 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6158 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6159 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6166 (define_expand "vec_widen_<US>mult_lo_<mode>"
6167 [(match_operand:<V_double_width> 0 "register_operand" "")
6168 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6169 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6172 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6173 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6174 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6181 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6182 [(match_operand:<V_double_width> 0 "register_operand" "")
6183 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6184 (match_operand:SI 2 "immediate_operand" "i")]
6187 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6188 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6189 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6195 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6196 [(match_operand:<V_double_width> 0 "register_operand" "")
6197 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6198 (match_operand:SI 2 "immediate_operand" "i")]
6201 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6202 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6203 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6209 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6210 ; because the ordering of vector elements in Q registers is different from what
6211 ; the semantics of the instructions require.
6213 (define_insn "vec_pack_trunc_<mode>"
6214 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6215 (vec_concat:<V_narrow_pack>
6216 (truncate:<V_narrow>
6217 (match_operand:VN 1 "register_operand" "w"))
6218 (truncate:<V_narrow>
6219 (match_operand:VN 2 "register_operand" "w"))))]
6220 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6221 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6222 [(set_attr "type" "multiple")
6223 (set_attr "length" "8")]
6226 ;; For the non-quad case.
6227 (define_insn "neon_vec_pack_trunc_<mode>"
6228 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6229 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6230 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6231 "vmovn.i<V_sz_elem>\t%P0, %q1"
6232 [(set_attr "type" "neon_move_narrow_q")]
6235 (define_expand "vec_pack_trunc_<mode>"
6236 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6237 (match_operand:VSHFT 1 "register_operand" "")
6238 (match_operand:VSHFT 2 "register_operand")]
6239 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6241 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6243 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6244 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6245 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6249 (define_insn "neon_vabd<mode>_2"
6250 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6251 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6252 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6253 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6254 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6256 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6257 (const_string "neon_fp_abd_s<q>")
6258 (const_string "neon_abd<q>")))]
6261 (define_insn "neon_vabd<mode>_3"
6262 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6263 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6264 (match_operand:VDQ 2 "s_register_operand" "w")]
6266 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6267 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6269 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6270 (const_string "neon_fp_abd_s<q>")
6271 (const_string "neon_abd<q>")))]
6274 ;; Copy from core-to-neon regs, then extend, not vice-versa
6277 [(set (match_operand:DI 0 "s_register_operand" "")
6278 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6279 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6280 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6281 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6283 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6287 [(set (match_operand:DI 0 "s_register_operand" "")
6288 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6289 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6290 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6291 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6293 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6297 [(set (match_operand:DI 0 "s_register_operand" "")
6298 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6299 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6300 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6301 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6303 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6307 [(set (match_operand:DI 0 "s_register_operand" "")
6308 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6309 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6310 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6311 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6313 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6317 [(set (match_operand:DI 0 "s_register_operand" "")
6318 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6319 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6320 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6321 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6323 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6327 [(set (match_operand:DI 0 "s_register_operand" "")
6328 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6329 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6330 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6331 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6333 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));