1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 /* We define these mov expanders to match the standard mov$a optab to prevent
117 the mid-end from trying to do a subreg for these modes which is the most
118 inefficient way to expand the move. Also big-endian subreg's aren't
119 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
120 Without these RTL generation patterns the mid-end would attempt to take a
121 sub-reg and may ICE if it can't. */
123 (define_expand "movti"
124 [(set (match_operand:TI 0 "nonimmediate_operand" "")
125 (match_operand:TI 1 "general_operand" ""))]
128 if (can_create_pseudo_p ())
130 if (!REG_P (operands[0]))
131 operands[1] = force_reg (TImode, operands[1]);
135 (define_expand "mov<mode>"
136 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
137 (match_operand:VSTRUCT 1 "general_operand" ""))]
140 if (can_create_pseudo_p ())
142 if (!REG_P (operands[0]))
143 operands[1] = force_reg (<MODE>mode, operands[1]);
147 (define_expand "mov<mode>"
148 [(set (match_operand:VH 0 "s_register_operand")
149 (match_operand:VH 1 "s_register_operand"))]
152 if (can_create_pseudo_p ())
154 if (!REG_P (operands[0]))
155 operands[1] = force_reg (<MODE>mode, operands[1]);
159 (define_insn "*neon_mov<mode>"
160 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
161 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
163 && (register_operand (operands[0], <MODE>mode)
164 || register_operand (operands[1], <MODE>mode))"
166 switch (which_alternative)
169 case 1: case 2: return output_move_neon (operands);
170 default: gcc_unreachable ();
173 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
174 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
177 [(set (match_operand:EI 0 "s_register_operand" "")
178 (match_operand:EI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (DImode, rdest + 4);
190 src[1] = gen_rtx_REG (DImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:OI 0 "s_register_operand" "")
197 (match_operand:OI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))]
202 int rdest = REGNO (operands[0]);
203 int rsrc = REGNO (operands[1]);
206 dest[0] = gen_rtx_REG (TImode, rdest);
207 src[0] = gen_rtx_REG (TImode, rsrc);
208 dest[1] = gen_rtx_REG (TImode, rdest + 4);
209 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 neon_disambiguate_copy (operands, dest, src, 2);
215 [(set (match_operand:CI 0 "s_register_operand" "")
216 (match_operand:CI 1 "s_register_operand" ""))]
217 "TARGET_NEON && reload_completed"
218 [(set (match_dup 0) (match_dup 1))
219 (set (match_dup 2) (match_dup 3))
220 (set (match_dup 4) (match_dup 5))]
222 int rdest = REGNO (operands[0]);
223 int rsrc = REGNO (operands[1]);
226 dest[0] = gen_rtx_REG (TImode, rdest);
227 src[0] = gen_rtx_REG (TImode, rsrc);
228 dest[1] = gen_rtx_REG (TImode, rdest + 4);
229 src[1] = gen_rtx_REG (TImode, rsrc + 4);
230 dest[2] = gen_rtx_REG (TImode, rdest + 8);
231 src[2] = gen_rtx_REG (TImode, rsrc + 8);
233 neon_disambiguate_copy (operands, dest, src, 3);
237 [(set (match_operand:XI 0 "s_register_operand" "")
238 (match_operand:XI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))
243 (set (match_dup 6) (match_dup 7))]
245 int rdest = REGNO (operands[0]);
246 int rsrc = REGNO (operands[1]);
249 dest[0] = gen_rtx_REG (TImode, rdest);
250 src[0] = gen_rtx_REG (TImode, rsrc);
251 dest[1] = gen_rtx_REG (TImode, rdest + 4);
252 src[1] = gen_rtx_REG (TImode, rsrc + 4);
253 dest[2] = gen_rtx_REG (TImode, rdest + 8);
254 src[2] = gen_rtx_REG (TImode, rsrc + 8);
255 dest[3] = gen_rtx_REG (TImode, rdest + 12);
256 src[3] = gen_rtx_REG (TImode, rsrc + 12);
258 neon_disambiguate_copy (operands, dest, src, 4);
261 (define_expand "movmisalign<mode>"
262 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
263 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
264 UNSPEC_MISALIGNED_ACCESS))]
265 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
268 /* This pattern is not permitted to fail during expansion: if both arguments
269 are non-registers (e.g. memory := constant, which can be created by the
270 auto-vectorizer), force operand 1 into a register. */
271 if (!s_register_operand (operands[0], <MODE>mode)
272 && !s_register_operand (operands[1], <MODE>mode))
273 operands[1] = force_reg (<MODE>mode, operands[1]);
275 if (s_register_operand (operands[0], <MODE>mode))
276 adjust_mem = operands[1];
278 adjust_mem = operands[0];
280 /* Legitimize address. */
281 if (!neon_vector_mem_operand (adjust_mem, 2, true))
282 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
286 (define_insn "*movmisalign<mode>_neon_store"
287 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
288 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
289 UNSPEC_MISALIGNED_ACCESS))]
290 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
291 "vst1.<V_sz_elem>\t{%P1}, %A0"
292 [(set_attr "type" "neon_store1_1reg<q>")])
294 (define_insn "*movmisalign<mode>_neon_load"
295 [(set (match_operand:VDX 0 "s_register_operand" "=w")
296 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
298 UNSPEC_MISALIGNED_ACCESS))]
299 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
300 "vld1.<V_sz_elem>\t{%P0}, %A1"
301 [(set_attr "type" "neon_load1_1reg<q>")])
303 (define_insn "*movmisalign<mode>_neon_store"
304 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
305 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
306 UNSPEC_MISALIGNED_ACCESS))]
307 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
308 "vst1.<V_sz_elem>\t{%q1}, %A0"
309 [(set_attr "type" "neon_store1_1reg<q>")])
311 (define_insn "*movmisalign<mode>_neon_load"
312 [(set (match_operand:VQX 0 "s_register_operand" "=w")
313 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
315 UNSPEC_MISALIGNED_ACCESS))]
316 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
317 "vld1.<V_sz_elem>\t{%q0}, %A1"
318 [(set_attr "type" "neon_load1_1reg<q>")])
320 (define_insn "vec_set<mode>_internal"
321 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
323 (vec_duplicate:VD_LANE
324 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
325 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
326 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 int elt = ffs ((int) INTVAL (operands[2])) - 1;
330 if (BYTES_BIG_ENDIAN)
331 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
332 operands[2] = GEN_INT (elt);
334 if (which_alternative == 0)
335 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
337 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
339 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
341 (define_insn "vec_set<mode>_internal"
342 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
345 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
346 (match_operand:VQ2 3 "s_register_operand" "0,0")
347 (match_operand:SI 2 "immediate_operand" "i,i")))]
350 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
351 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
352 int elt = elem % half_elts;
353 int hi = (elem / half_elts) * 2;
354 int regno = REGNO (operands[0]);
356 if (BYTES_BIG_ENDIAN)
357 elt = half_elts - 1 - elt;
359 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
360 operands[2] = GEN_INT (elt);
362 if (which_alternative == 0)
363 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
365 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
367 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
370 (define_insn "vec_setv2di_internal"
371 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
374 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
375 (match_operand:V2DI 3 "s_register_operand" "0,0")
376 (match_operand:SI 2 "immediate_operand" "i,i")))]
379 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
380 int regno = REGNO (operands[0]) + 2 * elem;
382 operands[0] = gen_rtx_REG (DImode, regno);
384 if (which_alternative == 0)
385 return "vld1.64\t%P0, %A1";
387 return "vmov\t%P0, %Q1, %R1";
389 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
392 (define_expand "vec_set<mode>"
393 [(match_operand:VDQ 0 "s_register_operand" "")
394 (match_operand:<V_elem> 1 "s_register_operand" "")
395 (match_operand:SI 2 "immediate_operand" "")]
398 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
399 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
400 GEN_INT (elem), operands[0]));
404 (define_insn "vec_extract<mode><V_elem_l>"
405 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
407 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
408 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
411 if (BYTES_BIG_ENDIAN)
413 int elt = INTVAL (operands[2]);
414 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
415 operands[2] = GEN_INT (elt);
418 if (which_alternative == 0)
419 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
421 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
423 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
426 (define_insn "vec_extract<mode><V_elem_l>"
427 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
429 (match_operand:VQ2 1 "s_register_operand" "w,w")
430 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
433 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
434 int elt = INTVAL (operands[2]) % half_elts;
435 int hi = (INTVAL (operands[2]) / half_elts) * 2;
436 int regno = REGNO (operands[1]);
438 if (BYTES_BIG_ENDIAN)
439 elt = half_elts - 1 - elt;
441 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
442 operands[2] = GEN_INT (elt);
444 if (which_alternative == 0)
445 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
447 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
449 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
452 (define_insn "vec_extractv2didi"
453 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
455 (match_operand:V2DI 1 "s_register_operand" "w,w")
456 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
459 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
461 operands[1] = gen_rtx_REG (DImode, regno);
463 if (which_alternative == 0)
464 return "vst1.64\t{%P1}, %A0 @ v2di";
466 return "vmov\t%Q0, %R0, %P1 @ v2di";
468 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
471 (define_expand "vec_init<mode><V_elem_l>"
472 [(match_operand:VDQ 0 "s_register_operand" "")
473 (match_operand 1 "" "")]
476 neon_expand_vector_init (operands[0], operands[1]);
480 ;; Doubleword and quadword arithmetic.
482 ;; NOTE: some other instructions also support 64-bit integer
483 ;; element size, which we could potentially use for "long long" operations.
485 (define_insn "*add<mode>3_neon"
486 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
487 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
488 (match_operand:VDQ 2 "s_register_operand" "w")))]
489 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
490 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
492 (if_then_else (match_test "<Is_float_mode>")
493 (const_string "neon_fp_addsub_s<q>")
494 (const_string "neon_add<q>")))]
497 ;; As with SFmode, full support for HFmode vector arithmetic is only available
498 ;; when flag-unsafe-math-optimizations is enabled.
500 (define_insn "add<mode>3"
502 (match_operand:VH 0 "s_register_operand" "=w")
504 (match_operand:VH 1 "s_register_operand" "w")
505 (match_operand:VH 2 "s_register_operand" "w")))]
506 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
507 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
509 (if_then_else (match_test "<Is_float_mode>")
510 (const_string "neon_fp_addsub_s<q>")
511 (const_string "neon_add<q>")))]
514 (define_insn "add<mode>3_fp16"
516 (match_operand:VH 0 "s_register_operand" "=w")
518 (match_operand:VH 1 "s_register_operand" "w")
519 (match_operand:VH 2 "s_register_operand" "w")))]
520 "TARGET_NEON_FP16INST"
521 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
523 (if_then_else (match_test "<Is_float_mode>")
524 (const_string "neon_fp_addsub_s<q>")
525 (const_string "neon_add<q>")))]
528 (define_insn "adddi3_neon"
529 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
530 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
531 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
532 (clobber (reg:CC CC_REGNUM))]
535 switch (which_alternative)
537 case 0: /* fall through */
538 case 3: return "vadd.i64\t%P0, %P1, %P2";
544 default: gcc_unreachable ();
547 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
548 multiple,multiple,multiple")
549 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
550 (set_attr "length" "*,8,8,*,8,8,8")
551 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
554 (define_insn "*sub<mode>3_neon"
555 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
556 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
557 (match_operand:VDQ 2 "s_register_operand" "w")))]
558 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
559 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
561 (if_then_else (match_test "<Is_float_mode>")
562 (const_string "neon_fp_addsub_s<q>")
563 (const_string "neon_sub<q>")))]
566 (define_insn "sub<mode>3"
568 (match_operand:VH 0 "s_register_operand" "=w")
570 (match_operand:VH 1 "s_register_operand" "w")
571 (match_operand:VH 2 "s_register_operand" "w")))]
572 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
573 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
574 [(set_attr "type" "neon_sub<q>")]
577 (define_insn "sub<mode>3_fp16"
579 (match_operand:VH 0 "s_register_operand" "=w")
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
588 (define_insn "subdi3_neon"
589 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
590 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
591 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
592 (clobber (reg:CC CC_REGNUM))]
595 switch (which_alternative)
597 case 0: /* fall through */
598 case 4: return "vsub.i64\t%P0, %P1, %P2";
599 case 1: /* fall through */
600 case 2: /* fall through */
601 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
602 default: gcc_unreachable ();
605 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
606 (set_attr "conds" "*,clob,clob,clob,*")
607 (set_attr "length" "*,8,8,8,*")
608 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
611 (define_insn "*mul<mode>3_neon"
612 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
613 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
614 (match_operand:VDQW 2 "s_register_operand" "w")))]
615 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
616 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
618 (if_then_else (match_test "<Is_float_mode>")
619 (const_string "neon_fp_mul_s<q>")
620 (const_string "neon_mul_<V_elem_ch><q>")))]
623 /* Perform division using multiply-by-reciprocal.
624 Reciprocal is calculated using Newton-Raphson method.
625 Enabled with -funsafe-math-optimizations -freciprocal-math
626 and disabled for -Os since it increases code size . */
628 (define_expand "div<mode>3"
629 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
630 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
631 (match_operand:VCVTF 2 "s_register_operand" "w")))]
632 "TARGET_NEON && !optimize_size
633 && flag_reciprocal_math"
635 rtx rec = gen_reg_rtx (<MODE>mode);
636 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
638 /* Reciprocal estimate. */
639 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
641 /* Perform 2 iterations of newton-raphson method. */
642 for (int i = 0; i < 2; i++)
644 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
645 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
648 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
649 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
655 (define_insn "mul<mode>3add<mode>_neon"
656 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
657 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
658 (match_operand:VDQW 3 "s_register_operand" "w"))
659 (match_operand:VDQW 1 "s_register_operand" "0")))]
660 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
661 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
663 (if_then_else (match_test "<Is_float_mode>")
664 (const_string "neon_fp_mla_s<q>")
665 (const_string "neon_mla_<V_elem_ch><q>")))]
668 (define_insn "mul<mode>3add<mode>_neon"
669 [(set (match_operand:VH 0 "s_register_operand" "=w")
670 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
671 (match_operand:VH 3 "s_register_operand" "w"))
672 (match_operand:VH 1 "s_register_operand" "0")))]
673 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
674 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
675 [(set_attr "type" "neon_fp_mla_s<q>")]
678 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
679 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
680 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
681 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
682 (match_operand:VDQW 3 "s_register_operand" "w"))))]
683 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
684 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
686 (if_then_else (match_test "<Is_float_mode>")
687 (const_string "neon_fp_mla_s<q>")
688 (const_string "neon_mla_<V_elem_ch><q>")))]
691 ;; Fused multiply-accumulate
692 ;; We define each insn twice here:
693 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
694 ;; to be able to use when converting to FMA.
695 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
696 (define_insn "fma<VCVTF:mode>4"
697 [(set (match_operand:VCVTF 0 "register_operand" "=w")
698 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
699 (match_operand:VCVTF 2 "register_operand" "w")
700 (match_operand:VCVTF 3 "register_operand" "0")))]
701 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "fma<VCVTF:mode>4_intrinsic"
707 [(set (match_operand:VCVTF 0 "register_operand" "=w")
708 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
709 (match_operand:VCVTF 2 "register_operand" "w")
710 (match_operand:VCVTF 3 "register_operand" "0")))]
711 "TARGET_NEON && TARGET_FMA"
712 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713 [(set_attr "type" "neon_fp_mla_s<q>")]
716 (define_insn "fma<VH:mode>4"
717 [(set (match_operand:VH 0 "register_operand" "=w")
719 (match_operand:VH 1 "register_operand" "w")
720 (match_operand:VH 2 "register_operand" "w")
721 (match_operand:VH 3 "register_operand" "0")))]
722 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
723 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fma<VH:mode>4_intrinsic"
728 [(set (match_operand:VH 0 "register_operand" "=w")
730 (match_operand:VH 1 "register_operand" "w")
731 (match_operand:VH 2 "register_operand" "w")
732 (match_operand:VH 3 "register_operand" "0")))]
733 "TARGET_NEON_FP16INST"
734 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "*fmsub<VCVTF:mode>4"
739 [(set (match_operand:VCVTF 0 "register_operand" "=w")
740 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
741 (match_operand:VCVTF 2 "register_operand" "w")
742 (match_operand:VCVTF 3 "register_operand" "0")))]
743 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
744 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
745 [(set_attr "type" "neon_fp_mla_s<q>")]
748 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
749 [(set (match_operand:VCVTF 0 "register_operand" "=w")
751 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
752 (match_operand:VCVTF 2 "register_operand" "w")
753 (match_operand:VCVTF 3 "register_operand" "0")))]
754 "TARGET_NEON && TARGET_FMA"
755 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
756 [(set_attr "type" "neon_fp_mla_s<q>")]
759 (define_insn "fmsub<VH:mode>4_intrinsic"
760 [(set (match_operand:VH 0 "register_operand" "=w")
762 (neg:VH (match_operand:VH 1 "register_operand" "w"))
763 (match_operand:VH 2 "register_operand" "w")
764 (match_operand:VH 3 "register_operand" "0")))]
765 "TARGET_NEON_FP16INST"
766 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767 [(set_attr "type" "neon_fp_mla_s<q>")]
770 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
771 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
772 (unspec:VCVTF [(match_operand:VCVTF 1
773 "s_register_operand" "w")]
775 "TARGET_NEON && TARGET_VFP5"
776 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
777 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
780 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
781 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
782 (FIXUORS:<V_cmp_result> (unspec:VCVTF
783 [(match_operand:VCVTF 1 "register_operand" "w")]
785 "TARGET_NEON && TARGET_VFP5"
786 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
787 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
788 (set_attr "predicable" "no")]
791 (define_insn "ior<mode>3"
792 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
793 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
794 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
797 switch (which_alternative)
799 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
800 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
801 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
802 default: gcc_unreachable ();
805 [(set_attr "type" "neon_logic<q>")]
808 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
809 ;; vorr. We support the pseudo-instruction vand instead, because that
810 ;; corresponds to the canonical form the middle-end expects to use for
811 ;; immediate bitwise-ANDs.
813 (define_insn "and<mode>3"
814 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
815 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
816 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
819 switch (which_alternative)
821 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
822 case 1: return neon_output_logic_immediate ("vand", &operands[2],
823 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
824 default: gcc_unreachable ();
827 [(set_attr "type" "neon_logic<q>")]
830 (define_insn "orn<mode>3_neon"
831 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
832 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
833 (match_operand:VDQ 1 "s_register_operand" "w")))]
835 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
836 [(set_attr "type" "neon_logic<q>")]
839 ;; TODO: investigate whether we should disable
840 ;; this and bicdi3_neon for the A8 in line with the other
842 (define_insn_and_split "orndi3_neon"
843 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
844 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
845 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
853 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
854 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
855 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
860 operands[3] = gen_highpart (SImode, operands[0]);
861 operands[0] = gen_lowpart (SImode, operands[0]);
862 operands[4] = gen_highpart (SImode, operands[2]);
863 operands[2] = gen_lowpart (SImode, operands[2]);
864 operands[5] = gen_highpart (SImode, operands[1]);
865 operands[1] = gen_lowpart (SImode, operands[1]);
869 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
870 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
874 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
875 (set_attr "length" "*,16,8,8")
876 (set_attr "arch" "any,a,t2,t2")]
879 (define_insn "bic<mode>3_neon"
880 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
881 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
882 (match_operand:VDQ 1 "s_register_operand" "w")))]
884 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
885 [(set_attr "type" "neon_logic<q>")]
888 ;; Compare to *anddi_notdi_di.
889 (define_insn "bicdi3_neon"
890 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
891 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
892 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
898 [(set_attr "type" "neon_logic,multiple,multiple")
899 (set_attr "length" "*,8,8")]
902 (define_insn "xor<mode>3"
903 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
904 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
905 (match_operand:VDQ 2 "s_register_operand" "w")))]
907 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
908 [(set_attr "type" "neon_logic<q>")]
911 (define_insn "one_cmpl<mode>2"
912 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
913 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
915 "vmvn\t%<V_reg>0, %<V_reg>1"
916 [(set_attr "type" "neon_move<q>")]
919 (define_insn "abs<mode>2"
920 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
921 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
923 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
925 (if_then_else (match_test "<Is_float_mode>")
926 (const_string "neon_fp_abs_s<q>")
927 (const_string "neon_abs<q>")))]
930 (define_insn "neg<mode>2"
931 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
932 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
934 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
936 (if_then_else (match_test "<Is_float_mode>")
937 (const_string "neon_fp_neg_s<q>")
938 (const_string "neon_neg<q>")))]
941 (define_insn "negdi2_neon"
942 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
943 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
944 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
945 (clobber (reg:CC CC_REGNUM))]
948 [(set_attr "length" "8")
949 (set_attr "type" "multiple")]
952 ; Split negdi2_neon for vfp registers
954 [(set (match_operand:DI 0 "s_register_operand" "")
955 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
956 (clobber (match_scratch:DI 2 ""))
957 (clobber (reg:CC CC_REGNUM))]
958 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
959 [(set (match_dup 2) (const_int 0))
960 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
961 (clobber (reg:CC CC_REGNUM))])]
963 if (!REG_P (operands[2]))
964 operands[2] = operands[0];
968 ; Split negdi2_neon for core registers
970 [(set (match_operand:DI 0 "s_register_operand" "")
971 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
972 (clobber (match_scratch:DI 2 ""))
973 (clobber (reg:CC CC_REGNUM))]
974 "TARGET_32BIT && reload_completed
975 && arm_general_register_operand (operands[0], DImode)"
976 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
977 (clobber (reg:CC CC_REGNUM))])]
981 (define_insn "<absneg_str><mode>2"
982 [(set (match_operand:VH 0 "s_register_operand" "=w")
983 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
984 "TARGET_NEON_FP16INST"
985 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
986 [(set_attr "type" "neon_abs<q>")]
989 (define_expand "neon_v<absneg_str><mode>"
991 (match_operand:VH 0 "s_register_operand")
992 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
993 "TARGET_NEON_FP16INST"
995 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
999 (define_insn "neon_v<fp16_rnd_str><mode>"
1000 [(set (match_operand:VH 0 "s_register_operand" "=w")
1002 [(match_operand:VH 1 "s_register_operand" "w")]
1004 "TARGET_NEON_FP16INST"
1005 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1006 [(set_attr "type" "neon_fp_round_s<q>")]
1009 (define_insn "neon_vrsqrte<mode>"
1010 [(set (match_operand:VH 0 "s_register_operand" "=w")
1012 [(match_operand:VH 1 "s_register_operand" "w")]
1014 "TARGET_NEON_FP16INST"
1015 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1016 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1019 (define_insn "*umin<mode>3_neon"
1020 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1021 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1022 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1024 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1025 [(set_attr "type" "neon_minmax<q>")]
1028 (define_insn "*umax<mode>3_neon"
1029 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1030 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1031 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1033 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034 [(set_attr "type" "neon_minmax<q>")]
1037 (define_insn "*smin<mode>3_neon"
1038 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1039 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1040 (match_operand:VDQW 2 "s_register_operand" "w")))]
1042 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1044 (if_then_else (match_test "<Is_float_mode>")
1045 (const_string "neon_fp_minmax_s<q>")
1046 (const_string "neon_minmax<q>")))]
1049 (define_insn "*smax<mode>3_neon"
1050 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1051 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1052 (match_operand:VDQW 2 "s_register_operand" "w")))]
1054 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1056 (if_then_else (match_test "<Is_float_mode>")
1057 (const_string "neon_fp_minmax_s<q>")
1058 (const_string "neon_minmax<q>")))]
1061 ; TODO: V2DI shifts are current disabled because there are bugs in the
1062 ; generic vectorizer code. It ends up creating a V2DI constructor with
1065 (define_insn "vashl<mode>3"
1066 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1067 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1068 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1071 switch (which_alternative)
1073 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1074 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1076 VALID_NEON_QREG_MODE (<MODE>mode),
1078 default: gcc_unreachable ();
1081 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1084 (define_insn "vashr<mode>3_imm"
1085 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1086 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1087 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1090 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1091 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1094 [(set_attr "type" "neon_shift_imm<q>")]
1097 (define_insn "vlshr<mode>3_imm"
1098 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1099 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1100 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1103 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1104 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1107 [(set_attr "type" "neon_shift_imm<q>")]
1110 ; Used for implementing logical shift-right, which is a left-shift by a negative
1111 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1112 ; above, but using an unspec in case GCC tries anything tricky with negative
1115 (define_insn "ashl<mode>3_signed"
1116 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1117 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1118 (match_operand:VDQI 2 "s_register_operand" "w")]
1119 UNSPEC_ASHIFT_SIGNED))]
1121 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1122 [(set_attr "type" "neon_shift_reg<q>")]
1125 ; Used for implementing logical shift-right, which is a left-shift by a negative
1126 ; amount, with unsigned operands.
1128 (define_insn "ashl<mode>3_unsigned"
1129 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1130 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1131 (match_operand:VDQI 2 "s_register_operand" "w")]
1132 UNSPEC_ASHIFT_UNSIGNED))]
1134 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1135 [(set_attr "type" "neon_shift_reg<q>")]
1138 (define_expand "vashr<mode>3"
1139 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1140 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1141 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1144 if (s_register_operand (operands[2], <MODE>mode))
1146 rtx neg = gen_reg_rtx (<MODE>mode);
1147 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1148 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1151 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1155 (define_expand "vlshr<mode>3"
1156 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1157 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1158 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1161 if (s_register_operand (operands[2], <MODE>mode))
1163 rtx neg = gen_reg_rtx (<MODE>mode);
1164 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1165 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1168 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1174 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1175 ;; leaving the upper half uninitalized. This is OK since the shift
1176 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1177 ;; data flow analysis however, we pretend the full register is set
1179 (define_insn "neon_load_count"
1180 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1181 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1182 UNSPEC_LOAD_COUNT))]
1185 vld1.32\t{%P0[0]}, %A1
1186 vmov.32\t%P0[0], %1"
1187 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1190 (define_insn "ashldi3_neon_noclobber"
1191 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1192 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1193 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1194 "TARGET_NEON && reload_completed
1195 && (!CONST_INT_P (operands[2])
1196 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1198 vshl.u64\t%P0, %P1, %2
1199 vshl.u64\t%P0, %P1, %P2"
1200 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1203 (define_insn_and_split "ashldi3_neon"
1204 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1205 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1206 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1207 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1208 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1209 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1210 (clobber (reg:CC_C CC_REGNUM))]
1213 "TARGET_NEON && reload_completed"
1217 if (IS_VFP_REGNUM (REGNO (operands[0])))
1219 if (CONST_INT_P (operands[2]))
1221 if (INTVAL (operands[2]) < 1)
1223 emit_insn (gen_movdi (operands[0], operands[1]));
1226 else if (INTVAL (operands[2]) > 63)
1227 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1231 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1232 operands[2] = operands[5];
1235 /* Ditch the unnecessary clobbers. */
1236 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1241 /* The shift expanders support either full overlap or no overlap. */
1242 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1243 || REGNO (operands[0]) == REGNO (operands[1]));
1245 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1246 operands[2], operands[3], operands[4]);
1250 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1251 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1252 (set_attr "type" "multiple")]
1255 ; The shift amount needs to be negated for right-shifts
1256 (define_insn "signed_shift_di3_neon"
1257 [(set (match_operand:DI 0 "s_register_operand" "=w")
1258 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1259 (match_operand:DI 2 "s_register_operand" " w")]
1260 UNSPEC_ASHIFT_SIGNED))]
1261 "TARGET_NEON && reload_completed"
1262 "vshl.s64\t%P0, %P1, %P2"
1263 [(set_attr "type" "neon_shift_reg")]
1266 ; The shift amount needs to be negated for right-shifts
1267 (define_insn "unsigned_shift_di3_neon"
1268 [(set (match_operand:DI 0 "s_register_operand" "=w")
1269 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1270 (match_operand:DI 2 "s_register_operand" " w")]
1271 UNSPEC_ASHIFT_UNSIGNED))]
1272 "TARGET_NEON && reload_completed"
1273 "vshl.u64\t%P0, %P1, %P2"
1274 [(set_attr "type" "neon_shift_reg")]
1277 (define_insn "ashrdi3_neon_imm_noclobber"
1278 [(set (match_operand:DI 0 "s_register_operand" "=w")
1279 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1280 (match_operand:DI 2 "const_int_operand" " i")))]
1281 "TARGET_NEON && reload_completed
1282 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1283 "vshr.s64\t%P0, %P1, %2"
1284 [(set_attr "type" "neon_shift_imm")]
1287 (define_insn "lshrdi3_neon_imm_noclobber"
1288 [(set (match_operand:DI 0 "s_register_operand" "=w")
1289 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1290 (match_operand:DI 2 "const_int_operand" " i")))]
1291 "TARGET_NEON && reload_completed
1292 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1293 "vshr.u64\t%P0, %P1, %2"
1294 [(set_attr "type" "neon_shift_imm")]
1299 (define_insn_and_split "<shift>di3_neon"
1300 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1301 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1302 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1303 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1304 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1305 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1306 (clobber (reg:CC CC_REGNUM))]
1309 "TARGET_NEON && reload_completed"
1313 if (IS_VFP_REGNUM (REGNO (operands[0])))
1315 if (CONST_INT_P (operands[2]))
1317 if (INTVAL (operands[2]) < 1)
1319 emit_insn (gen_movdi (operands[0], operands[1]));
1322 else if (INTVAL (operands[2]) > 64)
1323 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1325 /* Ditch the unnecessary clobbers. */
1326 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1332 /* We must use a negative left-shift. */
1333 emit_insn (gen_negsi2 (operands[3], operands[2]));
1334 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1335 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1341 /* The shift expanders support either full overlap or no overlap. */
1342 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1343 || REGNO (operands[0]) == REGNO (operands[1]));
1345 /* This clobbers CC (ASHIFTRT by register only). */
1346 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1347 operands[2], operands[3], operands[4]);
1352 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1353 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1354 (set_attr "type" "multiple")]
1357 ;; Widening operations
1359 (define_expand "widen_ssum<mode>3"
1360 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1361 (plus:<V_double_width>
1362 (sign_extend:<V_double_width>
1363 (match_operand:VQI 1 "s_register_operand" ""))
1364 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1367 machine_mode mode = GET_MODE (operands[1]);
1370 p1 = arm_simd_vect_par_cnst_half (mode, false);
1371 p2 = arm_simd_vect_par_cnst_half (mode, true);
1373 if (operands[0] != operands[2])
1374 emit_move_insn (operands[0], operands[2]);
1376 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1380 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1388 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1389 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1390 (plus:<V_double_width>
1391 (sign_extend:<V_double_width>
1392 (vec_select:<V_HALF>
1393 (match_operand:VQI 1 "s_register_operand" "%w")
1394 (match_operand:VQI 2 "vect_par_constant_low" "")))
1395 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1398 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1399 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1401 [(set_attr "type" "neon_add_widen")])
1403 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1404 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1405 (plus:<V_double_width>
1406 (sign_extend:<V_double_width>
1407 (vec_select:<V_HALF>
1408 (match_operand:VQI 1 "s_register_operand" "%w")
1409 (match_operand:VQI 2 "vect_par_constant_high" "")))
1410 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1413 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1414 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1416 [(set_attr "type" "neon_add_widen")])
1418 (define_insn "widen_ssum<mode>3"
1419 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1421 (sign_extend:<V_widen>
1422 (match_operand:VW 1 "s_register_operand" "%w"))
1423 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1425 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1426 [(set_attr "type" "neon_add_widen")]
1429 (define_expand "widen_usum<mode>3"
1430 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1431 (plus:<V_double_width>
1432 (zero_extend:<V_double_width>
1433 (match_operand:VQI 1 "s_register_operand" ""))
1434 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1437 machine_mode mode = GET_MODE (operands[1]);
1440 p1 = arm_simd_vect_par_cnst_half (mode, false);
1441 p2 = arm_simd_vect_par_cnst_half (mode, true);
1443 if (operands[0] != operands[2])
1444 emit_move_insn (operands[0], operands[2]);
1446 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1450 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1458 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1459 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1460 (plus:<V_double_width>
1461 (zero_extend:<V_double_width>
1462 (vec_select:<V_HALF>
1463 (match_operand:VQI 1 "s_register_operand" "%w")
1464 (match_operand:VQI 2 "vect_par_constant_low" "")))
1465 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1468 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1469 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1471 [(set_attr "type" "neon_add_widen")])
1473 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1474 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1475 (plus:<V_double_width>
1476 (zero_extend:<V_double_width>
1477 (vec_select:<V_HALF>
1478 (match_operand:VQI 1 "s_register_operand" "%w")
1479 (match_operand:VQI 2 "vect_par_constant_high" "")))
1480 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1483 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1484 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1486 [(set_attr "type" "neon_add_widen")])
1488 (define_insn "widen_usum<mode>3"
1489 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1490 (plus:<V_widen> (zero_extend:<V_widen>
1491 (match_operand:VW 1 "s_register_operand" "%w"))
1492 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1494 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1495 [(set_attr "type" "neon_add_widen")]
1498 ;; Helpers for quad-word reduction operations
1500 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1501 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1502 ; N/2-element vector.
1504 (define_insn "quad_halves_<code>v4si"
1505 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1507 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1508 (parallel [(const_int 0) (const_int 1)]))
1509 (vec_select:V2SI (match_dup 1)
1510 (parallel [(const_int 2) (const_int 3)]))))]
1512 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1513 [(set_attr "vqh_mnem" "<VQH_mnem>")
1514 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1517 (define_insn "quad_halves_<code>v4sf"
1518 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1520 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1521 (parallel [(const_int 0) (const_int 1)]))
1522 (vec_select:V2SF (match_dup 1)
1523 (parallel [(const_int 2) (const_int 3)]))))]
1524 "TARGET_NEON && flag_unsafe_math_optimizations"
1525 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1526 [(set_attr "vqh_mnem" "<VQH_mnem>")
1527 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1530 (define_insn "quad_halves_<code>v8hi"
1531 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1533 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1534 (parallel [(const_int 0) (const_int 1)
1535 (const_int 2) (const_int 3)]))
1536 (vec_select:V4HI (match_dup 1)
1537 (parallel [(const_int 4) (const_int 5)
1538 (const_int 6) (const_int 7)]))))]
1540 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1541 [(set_attr "vqh_mnem" "<VQH_mnem>")
1542 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1545 (define_insn "quad_halves_<code>v16qi"
1546 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1548 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1549 (parallel [(const_int 0) (const_int 1)
1550 (const_int 2) (const_int 3)
1551 (const_int 4) (const_int 5)
1552 (const_int 6) (const_int 7)]))
1553 (vec_select:V8QI (match_dup 1)
1554 (parallel [(const_int 8) (const_int 9)
1555 (const_int 10) (const_int 11)
1556 (const_int 12) (const_int 13)
1557 (const_int 14) (const_int 15)]))))]
1559 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1560 [(set_attr "vqh_mnem" "<VQH_mnem>")
1561 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1564 (define_expand "move_hi_quad_<mode>"
1565 [(match_operand:ANY128 0 "s_register_operand" "")
1566 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1569 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1570 GET_MODE_SIZE (<V_HALF>mode)),
1575 (define_expand "move_lo_quad_<mode>"
1576 [(match_operand:ANY128 0 "s_register_operand" "")
1577 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1580 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1586 ;; Reduction operations
1588 (define_expand "reduc_plus_scal_<mode>"
1589 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590 (match_operand:VD 1 "s_register_operand" "")]
1591 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1593 rtx vec = gen_reg_rtx (<MODE>mode);
1594 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1595 &gen_neon_vpadd_internal<mode>);
1596 /* The same result is actually computed into every element. */
1597 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1601 (define_expand "reduc_plus_scal_<mode>"
1602 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1603 (match_operand:VQ 1 "s_register_operand" "")]
1604 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1605 && !BYTES_BIG_ENDIAN"
1607 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1609 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1610 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1615 (define_expand "reduc_plus_scal_v2di"
1616 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1617 (match_operand:V2DI 1 "s_register_operand" "")]
1618 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620 rtx vec = gen_reg_rtx (V2DImode);
1622 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1623 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1628 (define_insn "arm_reduc_plus_internal_v2di"
1629 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1630 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1632 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1633 "vadd.i64\t%e0, %e1, %f1"
1634 [(set_attr "type" "neon_add_q")]
1637 (define_expand "reduc_smin_scal_<mode>"
1638 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1639 (match_operand:VD 1 "s_register_operand" "")]
1640 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1642 rtx vec = gen_reg_rtx (<MODE>mode);
1644 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1645 &gen_neon_vpsmin<mode>);
1646 /* The result is computed into every element of the vector. */
1647 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1651 (define_expand "reduc_smin_scal_<mode>"
1652 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1653 (match_operand:VQ 1 "s_register_operand" "")]
1654 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1655 && !BYTES_BIG_ENDIAN"
1657 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1659 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1660 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1665 (define_expand "reduc_smax_scal_<mode>"
1666 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667 (match_operand:VD 1 "s_register_operand" "")]
1668 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1670 rtx vec = gen_reg_rtx (<MODE>mode);
1671 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1672 &gen_neon_vpsmax<mode>);
1673 /* The result is computed into every element of the vector. */
1674 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1678 (define_expand "reduc_smax_scal_<mode>"
1679 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1680 (match_operand:VQ 1 "s_register_operand" "")]
1681 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1682 && !BYTES_BIG_ENDIAN"
1684 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1686 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1687 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1692 (define_expand "reduc_umin_scal_<mode>"
1693 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694 (match_operand:VDI 1 "s_register_operand" "")]
1697 rtx vec = gen_reg_rtx (<MODE>mode);
1698 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1699 &gen_neon_vpumin<mode>);
1700 /* The result is computed into every element of the vector. */
1701 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1705 (define_expand "reduc_umin_scal_<mode>"
1706 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707 (match_operand:VQI 1 "s_register_operand" "")]
1708 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1710 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1712 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1713 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1718 (define_expand "reduc_umax_scal_<mode>"
1719 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720 (match_operand:VDI 1 "s_register_operand" "")]
1723 rtx vec = gen_reg_rtx (<MODE>mode);
1724 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1725 &gen_neon_vpumax<mode>);
1726 /* The result is computed into every element of the vector. */
1727 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1731 (define_expand "reduc_umax_scal_<mode>"
1732 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1733 (match_operand:VQI 1 "s_register_operand" "")]
1734 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1736 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1738 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1739 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1744 (define_insn "neon_vpadd_internal<mode>"
1745 [(set (match_operand:VD 0 "s_register_operand" "=w")
1746 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1747 (match_operand:VD 2 "s_register_operand" "w")]
1750 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1751 ;; Assume this schedules like vadd.
1753 (if_then_else (match_test "<Is_float_mode>")
1754 (const_string "neon_fp_reduc_add_s<q>")
1755 (const_string "neon_reduc_add<q>")))]
1758 (define_insn "neon_vpaddv4hf"
1760 (match_operand:V4HF 0 "s_register_operand" "=w")
1761 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1762 (match_operand:V4HF 2 "s_register_operand" "w")]
1764 "TARGET_NEON_FP16INST"
1765 "vpadd.f16\t%P0, %P1, %P2"
1766 [(set_attr "type" "neon_reduc_add")]
1769 (define_insn "neon_vpsmin<mode>"
1770 [(set (match_operand:VD 0 "s_register_operand" "=w")
1771 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772 (match_operand:VD 2 "s_register_operand" "w")]
1775 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1777 (if_then_else (match_test "<Is_float_mode>")
1778 (const_string "neon_fp_reduc_minmax_s<q>")
1779 (const_string "neon_reduc_minmax<q>")))]
1782 (define_insn "neon_vpsmax<mode>"
1783 [(set (match_operand:VD 0 "s_register_operand" "=w")
1784 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1785 (match_operand:VD 2 "s_register_operand" "w")]
1788 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1790 (if_then_else (match_test "<Is_float_mode>")
1791 (const_string "neon_fp_reduc_minmax_s<q>")
1792 (const_string "neon_reduc_minmax<q>")))]
1795 (define_insn "neon_vpumin<mode>"
1796 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1797 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1798 (match_operand:VDI 2 "s_register_operand" "w")]
1801 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1802 [(set_attr "type" "neon_reduc_minmax<q>")]
1805 (define_insn "neon_vpumax<mode>"
1806 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1807 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1808 (match_operand:VDI 2 "s_register_operand" "w")]
1811 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1812 [(set_attr "type" "neon_reduc_minmax<q>")]
1815 ;; Saturating arithmetic
1817 ; NOTE: Neon supports many more saturating variants of instructions than the
1818 ; following, but these are all GCC currently understands.
1819 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1820 ; yet either, although these patterns may be used by intrinsics when they're
1823 (define_insn "*ss_add<mode>_neon"
1824 [(set (match_operand:VD 0 "s_register_operand" "=w")
1825 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1826 (match_operand:VD 2 "s_register_operand" "w")))]
1828 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1829 [(set_attr "type" "neon_qadd<q>")]
1832 (define_insn "*us_add<mode>_neon"
1833 [(set (match_operand:VD 0 "s_register_operand" "=w")
1834 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1835 (match_operand:VD 2 "s_register_operand" "w")))]
1837 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1838 [(set_attr "type" "neon_qadd<q>")]
1841 (define_insn "*ss_sub<mode>_neon"
1842 [(set (match_operand:VD 0 "s_register_operand" "=w")
1843 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1844 (match_operand:VD 2 "s_register_operand" "w")))]
1846 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1847 [(set_attr "type" "neon_qsub<q>")]
1850 (define_insn "*us_sub<mode>_neon"
1851 [(set (match_operand:VD 0 "s_register_operand" "=w")
1852 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1853 (match_operand:VD 2 "s_register_operand" "w")))]
1855 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1856 [(set_attr "type" "neon_qsub<q>")]
1859 ;; Conditional instructions. These are comparisons with conditional moves for
1860 ;; vectors. They perform the assignment:
1862 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1864 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1867 (define_expand "vcond<mode><mode>"
1868 [(set (match_operand:VDQW 0 "s_register_operand" "")
1870 (match_operator 3 "comparison_operator"
1871 [(match_operand:VDQW 4 "s_register_operand" "")
1872 (match_operand:VDQW 5 "nonmemory_operand" "")])
1873 (match_operand:VDQW 1 "s_register_operand" "")
1874 (match_operand:VDQW 2 "s_register_operand" "")))]
1875 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1878 int use_zero_form = 0;
1879 int swap_bsl_operands = 0;
1880 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1881 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1883 rtx (*base_comparison) (rtx, rtx, rtx);
1884 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1886 switch (GET_CODE (operands[3]))
1893 if (operands[5] == CONST0_RTX (<MODE>mode))
1900 if (!REG_P (operands[5]))
1901 operands[5] = force_reg (<MODE>mode, operands[5]);
1904 switch (GET_CODE (operands[3]))
1914 base_comparison = gen_neon_vcge<mode>;
1915 complimentary_comparison = gen_neon_vcgt<mode>;
1923 base_comparison = gen_neon_vcgt<mode>;
1924 complimentary_comparison = gen_neon_vcge<mode>;
1929 base_comparison = gen_neon_vceq<mode>;
1930 complimentary_comparison = gen_neon_vceq<mode>;
1936 switch (GET_CODE (operands[3]))
1943 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1944 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1950 Note that there also exist direct comparison against 0 forms,
1951 so catch those as a special case. */
1955 switch (GET_CODE (operands[3]))
1958 base_comparison = gen_neon_vclt<mode>;
1961 base_comparison = gen_neon_vcle<mode>;
1964 /* Do nothing, other zero form cases already have the correct
1971 emit_insn (base_comparison (mask, operands[4], operands[5]));
1973 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1980 /* Vector compare returns false for lanes which are unordered, so if we use
1981 the inverse of the comparison we actually want to emit, then
1982 swap the operands to BSL, we will end up with the correct result.
1983 Note that a NE NaN and NaN NE b are true for all a, b.
1985 Our transformations are:
1990 a NE b -> !(a EQ b) */
1993 emit_insn (base_comparison (mask, operands[4], operands[5]));
1995 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1997 swap_bsl_operands = 1;
2000 /* We check (a > b || b > a). combining these comparisons give us
2001 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2002 will then give us (a == b || a UNORDERED b) as intended. */
2004 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2005 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2006 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2007 swap_bsl_operands = 1;
2010 /* Operands are ORDERED iff (a > b || b >= a).
2011 Swapping the operands to BSL will give the UNORDERED case. */
2012 swap_bsl_operands = 1;
2015 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2016 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2017 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2023 if (swap_bsl_operands)
2024 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2027 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2032 (define_expand "vcondu<mode><mode>"
2033 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2035 (match_operator 3 "arm_comparison_operator"
2036 [(match_operand:VDQIW 4 "s_register_operand" "")
2037 (match_operand:VDQIW 5 "s_register_operand" "")])
2038 (match_operand:VDQIW 1 "s_register_operand" "")
2039 (match_operand:VDQIW 2 "s_register_operand" "")))]
2043 int inverse = 0, immediate_zero = 0;
2045 mask = gen_reg_rtx (<V_cmp_result>mode);
2047 if (operands[5] == CONST0_RTX (<MODE>mode))
2049 else if (!REG_P (operands[5]))
2050 operands[5] = force_reg (<MODE>mode, operands[5]);
2052 switch (GET_CODE (operands[3]))
2055 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2059 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2063 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2068 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2070 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2075 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2077 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2081 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2090 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2093 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2099 ;; Patterns for builtins.
2101 ; good for plain vadd, vaddq.
2103 (define_expand "neon_vadd<mode>"
2104 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2105 (match_operand:VCVTF 1 "s_register_operand" "w")
2106 (match_operand:VCVTF 2 "s_register_operand" "w")]
2109 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2110 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2112 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2117 (define_expand "neon_vadd<mode>"
2118 [(match_operand:VH 0 "s_register_operand")
2119 (match_operand:VH 1 "s_register_operand")
2120 (match_operand:VH 2 "s_register_operand")]
2121 "TARGET_NEON_FP16INST"
2123 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2127 (define_expand "neon_vsub<mode>"
2128 [(match_operand:VH 0 "s_register_operand")
2129 (match_operand:VH 1 "s_register_operand")
2130 (match_operand:VH 2 "s_register_operand")]
2131 "TARGET_NEON_FP16INST"
2133 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2137 ; Note that NEON operations don't support the full IEEE 754 standard: in
2138 ; particular, denormal values are flushed to zero. This means that GCC cannot
2139 ; use those instructions for autovectorization, etc. unless
2140 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2141 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2142 ; header) must work in either case: if -funsafe-math-optimizations is given,
2143 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2144 ; expand to unspecs (which may potentially limit the extent to which they might
2145 ; be optimized by generic code).
2147 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2149 (define_insn "neon_vadd<mode>_unspec"
2150 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2151 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2152 (match_operand:VCVTF 2 "s_register_operand" "w")]
2155 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2157 (if_then_else (match_test "<Is_float_mode>")
2158 (const_string "neon_fp_addsub_s<q>")
2159 (const_string "neon_add<q>")))]
2162 (define_insn "neon_vaddl<sup><mode>"
2163 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2164 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2165 (match_operand:VDI 2 "s_register_operand" "w")]
2168 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2169 [(set_attr "type" "neon_add_long")]
2172 (define_insn "neon_vaddw<sup><mode>"
2173 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2174 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2175 (match_operand:VDI 2 "s_register_operand" "w")]
2178 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2179 [(set_attr "type" "neon_add_widen")]
2184 (define_insn "neon_v<r>hadd<sup><mode>"
2185 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2186 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2187 (match_operand:VDQIW 2 "s_register_operand" "w")]
2190 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2191 [(set_attr "type" "neon_add_halve_q")]
2194 (define_insn "neon_vqadd<sup><mode>"
2195 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2196 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2197 (match_operand:VDQIX 2 "s_register_operand" "w")]
2200 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2201 [(set_attr "type" "neon_qadd<q>")]
2204 (define_insn "neon_v<r>addhn<mode>"
2205 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2206 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2207 (match_operand:VN 2 "s_register_operand" "w")]
2210 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2211 [(set_attr "type" "neon_add_halve_narrow_q")]
2214 ;; Polynomial and Float multiplication.
2215 (define_insn "neon_vmul<pf><mode>"
2216 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2217 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2218 (match_operand:VPF 2 "s_register_operand" "w")]
2221 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2223 (if_then_else (match_test "<Is_float_mode>")
2224 (const_string "neon_fp_mul_s<q>")
2225 (const_string "neon_mul_<V_elem_ch><q>")))]
2228 (define_insn "mul<mode>3"
2230 (match_operand:VH 0 "s_register_operand" "=w")
2232 (match_operand:VH 1 "s_register_operand" "w")
2233 (match_operand:VH 2 "s_register_operand" "w")))]
2234 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2235 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2236 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2239 (define_insn "neon_vmulf<mode>"
2241 (match_operand:VH 0 "s_register_operand" "=w")
2243 (match_operand:VH 1 "s_register_operand" "w")
2244 (match_operand:VH 2 "s_register_operand" "w")))]
2245 "TARGET_NEON_FP16INST"
2246 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2247 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2250 (define_expand "neon_vmla<mode>"
2251 [(match_operand:VDQW 0 "s_register_operand" "=w")
2252 (match_operand:VDQW 1 "s_register_operand" "0")
2253 (match_operand:VDQW 2 "s_register_operand" "w")
2254 (match_operand:VDQW 3 "s_register_operand" "w")]
2257 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2258 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2259 operands[2], operands[3]));
2261 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2262 operands[2], operands[3]));
2266 (define_expand "neon_vfma<VCVTF:mode>"
2267 [(match_operand:VCVTF 0 "s_register_operand")
2268 (match_operand:VCVTF 1 "s_register_operand")
2269 (match_operand:VCVTF 2 "s_register_operand")
2270 (match_operand:VCVTF 3 "s_register_operand")]
2271 "TARGET_NEON && TARGET_FMA"
2273 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2278 (define_expand "neon_vfma<VH:mode>"
2279 [(match_operand:VH 0 "s_register_operand")
2280 (match_operand:VH 1 "s_register_operand")
2281 (match_operand:VH 2 "s_register_operand")
2282 (match_operand:VH 3 "s_register_operand")]
2283 "TARGET_NEON_FP16INST"
2285 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2290 (define_expand "neon_vfms<VCVTF:mode>"
2291 [(match_operand:VCVTF 0 "s_register_operand")
2292 (match_operand:VCVTF 1 "s_register_operand")
2293 (match_operand:VCVTF 2 "s_register_operand")
2294 (match_operand:VCVTF 3 "s_register_operand")]
2295 "TARGET_NEON && TARGET_FMA"
2297 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2302 (define_expand "neon_vfms<VH:mode>"
2303 [(match_operand:VH 0 "s_register_operand")
2304 (match_operand:VH 1 "s_register_operand")
2305 (match_operand:VH 2 "s_register_operand")
2306 (match_operand:VH 3 "s_register_operand")]
2307 "TARGET_NEON_FP16INST"
2309 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2314 ;; The expand RTL structure here is not important.
2315 ;; We use the gen_* functions anyway.
2316 ;; We just need something to wrap the iterators around.
2318 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2319 [(set (match_operand:VCVTF 0 "s_register_operand")
2321 [(match_operand:VCVTF 1 "s_register_operand")
2323 (match_operand:<VFML> 2 "s_register_operand")
2324 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2327 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2328 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2336 (define_insn "vfmal_low<mode>_intrinsic"
2337 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2340 (vec_select:<VFMLSEL>
2341 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2342 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2344 (vec_select:<VFMLSEL>
2345 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2346 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2347 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2349 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2350 [(set_attr "type" "neon_fp_mla_s<q>")]
2353 (define_insn "vfmsl_high<mode>_intrinsic"
2354 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2358 (vec_select:<VFMLSEL>
2359 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2360 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2362 (vec_select:<VFMLSEL>
2363 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2364 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2365 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2367 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2368 [(set_attr "type" "neon_fp_mla_s<q>")]
2371 (define_insn "vfmal_high<mode>_intrinsic"
2372 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2375 (vec_select:<VFMLSEL>
2376 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2377 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2379 (vec_select:<VFMLSEL>
2380 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2381 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2382 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2384 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2385 [(set_attr "type" "neon_fp_mla_s<q>")]
2388 (define_insn "vfmsl_low<mode>_intrinsic"
2389 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2393 (vec_select:<VFMLSEL>
2394 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2395 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2397 (vec_select:<VFMLSEL>
2398 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2399 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2400 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2402 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2403 [(set_attr "type" "neon_fp_mla_s<q>")]
2406 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2407 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2409 [(match_operand:VCVTF 1 "s_register_operand")
2411 (match_operand:<VFML> 2 "s_register_operand")
2412 (match_operand:<VFML> 3 "s_register_operand"))
2413 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2416 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2417 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2418 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2419 (operands[0], operands[1],
2420 operands[2], operands[3],
2425 (define_insn "vfmal_lane_low<mode>_intrinsic"
2426 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2429 (vec_select:<VFMLSEL>
2430 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2431 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2433 (vec_duplicate:<VFMLSEL>
2435 (match_operand:<VFML> 3 "s_register_operand" "x")
2436 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2437 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2440 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2441 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2443 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2444 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2448 operands[5] = GEN_INT (lane);
2449 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2452 [(set_attr "type" "neon_fp_mla_s<q>")]
2455 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2456 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2458 [(match_operand:VCVTF 1 "s_register_operand")
2460 (match_operand:<VFML> 2 "s_register_operand")
2461 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2462 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2466 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2467 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2468 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2469 (operands[0], operands[1], operands[2], operands[3],
2474 ;; Used to implement the intrinsics:
2475 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2476 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2477 ;; Needs a bit of care to get the modes of the different sub-expressions right
2478 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2479 ;; S or D subregister to select the appropriate lane from.
2481 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2482 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2485 (vec_select:<VFMLSEL>
2486 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2487 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2489 (vec_duplicate:<VFMLSEL>
2491 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2492 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2493 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2496 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2497 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2498 int new_lane = lane % elts_per_reg;
2499 int regdiff = lane / elts_per_reg;
2500 operands[5] = GEN_INT (new_lane);
2501 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2502 because we want the print_operand code to print the appropriate
2503 S or D register prefix. */
2504 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2505 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2506 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2508 [(set_attr "type" "neon_fp_mla_s<q>")]
2511 ;; Used to implement the intrinsics:
2512 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2513 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2514 ;; Needs a bit of care to get the modes of the different sub-expressions right
2515 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2516 ;; S or D subregister to select the appropriate lane from.
2518 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2519 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2522 (vec_select:<VFMLSEL>
2523 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2524 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2526 (vec_duplicate:<VFMLSEL>
2528 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2529 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2530 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2533 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2534 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2535 int new_lane = lane % elts_per_reg;
2536 int regdiff = lane / elts_per_reg;
2537 operands[5] = GEN_INT (new_lane);
2538 /* We re-create operands[3] in the halved VFMLSEL mode
2539 because we've calculated the correct half-width subreg to extract
2540 the lane from and we want to print *that* subreg instead. */
2541 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2542 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2544 [(set_attr "type" "neon_fp_mla_s<q>")]
2547 (define_insn "vfmal_lane_high<mode>_intrinsic"
2548 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2551 (vec_select:<VFMLSEL>
2552 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2553 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2555 (vec_duplicate:<VFMLSEL>
2557 (match_operand:<VFML> 3 "s_register_operand" "x")
2558 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2559 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2562 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2563 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2565 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2566 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2570 operands[5] = GEN_INT (lane);
2571 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2574 [(set_attr "type" "neon_fp_mla_s<q>")]
2577 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2582 (vec_select:<VFMLSEL>
2583 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2584 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2586 (vec_duplicate:<VFMLSEL>
2588 (match_operand:<VFML> 3 "s_register_operand" "x")
2589 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2590 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2593 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2594 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2596 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2597 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2601 operands[5] = GEN_INT (lane);
2602 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2605 [(set_attr "type" "neon_fp_mla_s<q>")]
2608 ;; Used to implement the intrinsics:
2609 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2610 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2611 ;; Needs a bit of care to get the modes of the different sub-expressions right
2612 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2613 ;; S or D subregister to select the appropriate lane from.
2615 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2616 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2620 (vec_select:<VFMLSEL>
2621 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2622 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2624 (vec_duplicate:<VFMLSEL>
2626 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2627 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2628 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2631 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2632 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2633 int new_lane = lane % elts_per_reg;
2634 int regdiff = lane / elts_per_reg;
2635 operands[5] = GEN_INT (new_lane);
2636 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2637 because we want the print_operand code to print the appropriate
2638 S or D register prefix. */
2639 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2640 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2641 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2643 [(set_attr "type" "neon_fp_mla_s<q>")]
2646 ;; Used to implement the intrinsics:
2647 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2648 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2649 ;; Needs a bit of care to get the modes of the different sub-expressions right
2650 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2651 ;; S or D subregister to select the appropriate lane from.
2653 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2654 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2658 (vec_select:<VFMLSEL>
2659 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2660 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2662 (vec_duplicate:<VFMLSEL>
2664 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2665 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2666 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2669 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2670 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2671 int new_lane = lane % elts_per_reg;
2672 int regdiff = lane / elts_per_reg;
2673 operands[5] = GEN_INT (new_lane);
2674 /* We re-create operands[3] in the halved VFMLSEL mode
2675 because we've calculated the correct half-width subreg to extract
2676 the lane from and we want to print *that* subreg instead. */
2677 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2678 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2680 [(set_attr "type" "neon_fp_mla_s<q>")]
2683 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2684 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2688 (vec_select:<VFMLSEL>
2689 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2690 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2692 (vec_duplicate:<VFMLSEL>
2694 (match_operand:<VFML> 3 "s_register_operand" "x")
2695 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2696 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2699 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2700 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2702 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2703 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2707 operands[5] = GEN_INT (lane);
2708 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2711 [(set_attr "type" "neon_fp_mla_s<q>")]
2714 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2716 (define_insn "neon_vmla<mode>_unspec"
2717 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2718 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2719 (match_operand:VDQW 2 "s_register_operand" "w")
2720 (match_operand:VDQW 3 "s_register_operand" "w")]
2723 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2725 (if_then_else (match_test "<Is_float_mode>")
2726 (const_string "neon_fp_mla_s<q>")
2727 (const_string "neon_mla_<V_elem_ch><q>")))]
2730 (define_insn "neon_vmlal<sup><mode>"
2731 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2732 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2733 (match_operand:VW 2 "s_register_operand" "w")
2734 (match_operand:VW 3 "s_register_operand" "w")]
2737 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2738 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2741 (define_expand "neon_vmls<mode>"
2742 [(match_operand:VDQW 0 "s_register_operand" "=w")
2743 (match_operand:VDQW 1 "s_register_operand" "0")
2744 (match_operand:VDQW 2 "s_register_operand" "w")
2745 (match_operand:VDQW 3 "s_register_operand" "w")]
2748 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2749 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2750 operands[1], operands[2], operands[3]));
2752 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2753 operands[2], operands[3]));
2757 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2759 (define_insn "neon_vmls<mode>_unspec"
2760 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2761 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2762 (match_operand:VDQW 2 "s_register_operand" "w")
2763 (match_operand:VDQW 3 "s_register_operand" "w")]
2766 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2768 (if_then_else (match_test "<Is_float_mode>")
2769 (const_string "neon_fp_mla_s<q>")
2770 (const_string "neon_mla_<V_elem_ch><q>")))]
2773 (define_insn "neon_vmlsl<sup><mode>"
2774 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2775 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2776 (match_operand:VW 2 "s_register_operand" "w")
2777 (match_operand:VW 3 "s_register_operand" "w")]
2780 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2781 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2784 ;; vqdmulh, vqrdmulh
2785 (define_insn "neon_vq<r>dmulh<mode>"
2786 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2787 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2788 (match_operand:VMDQI 2 "s_register_operand" "w")]
2791 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2792 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2795 ;; vqrdmlah, vqrdmlsh
2796 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2797 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2798 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2799 (match_operand:VMDQI 2 "s_register_operand" "w")
2800 (match_operand:VMDQI 3 "s_register_operand" "w")]
2803 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2804 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2807 (define_insn "neon_vqdmlal<mode>"
2808 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2809 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2810 (match_operand:VMDI 2 "s_register_operand" "w")
2811 (match_operand:VMDI 3 "s_register_operand" "w")]
2814 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2815 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2818 (define_insn "neon_vqdmlsl<mode>"
2819 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2821 (match_operand:VMDI 2 "s_register_operand" "w")
2822 (match_operand:VMDI 3 "s_register_operand" "w")]
2825 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2826 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2829 (define_insn "neon_vmull<sup><mode>"
2830 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2831 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2832 (match_operand:VW 2 "s_register_operand" "w")]
2835 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2836 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2839 (define_insn "neon_vqdmull<mode>"
2840 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2841 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2842 (match_operand:VMDI 2 "s_register_operand" "w")]
2845 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2846 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2849 (define_expand "neon_vsub<mode>"
2850 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2851 (match_operand:VCVTF 1 "s_register_operand" "w")
2852 (match_operand:VCVTF 2 "s_register_operand" "w")]
2855 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2856 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2858 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2863 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2865 (define_insn "neon_vsub<mode>_unspec"
2866 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2867 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2868 (match_operand:VCVTF 2 "s_register_operand" "w")]
2871 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2873 (if_then_else (match_test "<Is_float_mode>")
2874 (const_string "neon_fp_addsub_s<q>")
2875 (const_string "neon_sub<q>")))]
2878 (define_insn "neon_vsubl<sup><mode>"
2879 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2880 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2881 (match_operand:VDI 2 "s_register_operand" "w")]
2884 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2885 [(set_attr "type" "neon_sub_long")]
2888 (define_insn "neon_vsubw<sup><mode>"
2889 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2890 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2891 (match_operand:VDI 2 "s_register_operand" "w")]
2894 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2895 [(set_attr "type" "neon_sub_widen")]
2898 (define_insn "neon_vqsub<sup><mode>"
2899 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2900 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2901 (match_operand:VDQIX 2 "s_register_operand" "w")]
2904 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2905 [(set_attr "type" "neon_qsub<q>")]
2908 (define_insn "neon_vhsub<sup><mode>"
2909 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2910 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2911 (match_operand:VDQIW 2 "s_register_operand" "w")]
2914 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2915 [(set_attr "type" "neon_sub_halve<q>")]
2918 (define_insn "neon_v<r>subhn<mode>"
2919 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2920 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2921 (match_operand:VN 2 "s_register_operand" "w")]
2924 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2925 [(set_attr "type" "neon_sub_halve_narrow_q")]
2928 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2929 ;; without unsafe math optimizations.
2930 (define_expand "neon_vc<cmp_op><mode>"
2931 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2933 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2934 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2937 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2939 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2940 && !flag_unsafe_math_optimizations)
2942 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2943 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2944 whereas this expander iterates over the integer modes as well,
2945 but we will never expand to UNSPECs for the integer comparisons. */
2949 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2954 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2963 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2970 (define_insn "neon_vc<cmp_op><mode>_insn"
2971 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2973 (COMPARISONS:<V_cmp_result>
2974 (match_operand:VDQW 1 "s_register_operand" "w,w")
2975 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2976 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2977 && !flag_unsafe_math_optimizations)"
2980 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2982 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2983 ? "f" : "<cmp_type>",
2984 which_alternative == 0
2985 ? "%<V_reg>2" : "#0");
2986 output_asm_insn (pattern, operands);
2990 (if_then_else (match_operand 2 "zero_operand")
2991 (const_string "neon_compare_zero<q>")
2992 (const_string "neon_compare<q>")))]
2995 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2996 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2997 (unspec:<V_cmp_result>
2998 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2999 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3004 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3006 which_alternative == 0
3007 ? "%<V_reg>2" : "#0");
3008 output_asm_insn (pattern, operands);
3011 [(set_attr "type" "neon_fp_compare_s<q>")]
3014 (define_expand "neon_vc<cmp_op><mode>"
3015 [(match_operand:<V_cmp_result> 0 "s_register_operand")
3018 (match_operand:VH 1 "s_register_operand")
3019 (match_operand:VH 2 "reg_or_zero_operand")))]
3020 "TARGET_NEON_FP16INST"
3022 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3024 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3025 && !flag_unsafe_math_optimizations)
3027 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3028 (operands[0], operands[1], operands[2]));
3031 (gen_neon_vc<cmp_op><mode>_fp16insn
3032 (operands[0], operands[1], operands[2]));
3036 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3037 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3039 (COMPARISONS:<V_cmp_result>
3040 (match_operand:VH 1 "s_register_operand" "w,w")
3041 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3042 "TARGET_NEON_FP16INST
3043 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3044 && !flag_unsafe_math_optimizations)"
3047 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3049 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3050 ? "f" : "<cmp_type>",
3051 which_alternative == 0
3052 ? "%<V_reg>2" : "#0");
3053 output_asm_insn (pattern, operands);
3057 (if_then_else (match_operand 2 "zero_operand")
3058 (const_string "neon_compare_zero<q>")
3059 (const_string "neon_compare<q>")))])
3061 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3063 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3064 (unspec:<V_cmp_result>
3065 [(match_operand:VH 1 "s_register_operand" "w,w")
3066 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3068 "TARGET_NEON_FP16INST"
3071 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3073 which_alternative == 0
3074 ? "%<V_reg>2" : "#0");
3075 output_asm_insn (pattern, operands);
3078 [(set_attr "type" "neon_fp_compare_s<q>")])
3080 (define_insn "neon_vc<cmp_op>u<mode>"
3081 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3083 (GTUGEU:<V_cmp_result>
3084 (match_operand:VDQIW 1 "s_register_operand" "w")
3085 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3087 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3088 [(set_attr "type" "neon_compare<q>")]
3091 (define_expand "neon_vca<cmp_op><mode>"
3092 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3094 (GTGE:<V_cmp_result>
3095 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3096 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3099 if (flag_unsafe_math_optimizations)
3100 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3103 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3110 (define_insn "neon_vca<cmp_op><mode>_insn"
3111 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3113 (GTGE:<V_cmp_result>
3114 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3115 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3116 "TARGET_NEON && flag_unsafe_math_optimizations"
3117 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3118 [(set_attr "type" "neon_fp_compare_s<q>")]
3121 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3122 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3123 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3124 (match_operand:VCVTF 2 "s_register_operand" "w")]
3127 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3128 [(set_attr "type" "neon_fp_compare_s<q>")]
3131 (define_expand "neon_vca<cmp_op><mode>"
3133 (match_operand:<V_cmp_result> 0 "s_register_operand")
3135 (GLTE:<V_cmp_result>
3136 (abs:VH (match_operand:VH 1 "s_register_operand"))
3137 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3138 "TARGET_NEON_FP16INST"
3140 if (flag_unsafe_math_optimizations)
3141 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3142 (operands[0], operands[1], operands[2]));
3144 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3145 (operands[0], operands[1], operands[2]));
3149 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3151 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3153 (GLTE:<V_cmp_result>
3154 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3155 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3156 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3157 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3158 [(set_attr "type" "neon_fp_compare_s<q>")]
3161 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3162 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3163 (unspec:<V_cmp_result>
3164 [(match_operand:VH 1 "s_register_operand" "w")
3165 (match_operand:VH 2 "s_register_operand" "w")]
3168 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3169 [(set_attr "type" "neon_fp_compare_s<q>")]
3172 (define_expand "neon_vc<cmp_op>z<mode>"
3174 (match_operand:<V_cmp_result> 0 "s_register_operand")
3175 (COMPARISONS:<V_cmp_result>
3176 (match_operand:VH 1 "s_register_operand")
3178 "TARGET_NEON_FP16INST"
3180 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3181 CONST0_RTX (<MODE>mode)));
3185 (define_insn "neon_vtst<mode>"
3186 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3187 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3188 (match_operand:VDQIW 2 "s_register_operand" "w")]
3191 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3192 [(set_attr "type" "neon_tst<q>")]
3195 (define_insn "neon_vabd<sup><mode>"
3196 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3197 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3198 (match_operand:VDQIW 2 "s_register_operand" "w")]
3201 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3202 [(set_attr "type" "neon_abd<q>")]
3205 (define_insn "neon_vabd<mode>"
3206 [(set (match_operand:VH 0 "s_register_operand" "=w")
3207 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3208 (match_operand:VH 2 "s_register_operand" "w")]
3210 "TARGET_NEON_FP16INST"
3211 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3212 [(set_attr "type" "neon_abd<q>")]
3215 (define_insn "neon_vabdf<mode>"
3216 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3217 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3218 (match_operand:VCVTF 2 "s_register_operand" "w")]
3221 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3222 [(set_attr "type" "neon_fp_abd_s<q>")]
3225 (define_insn "neon_vabdl<sup><mode>"
3226 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3228 (match_operand:VW 2 "s_register_operand" "w")]
3231 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3232 [(set_attr "type" "neon_abd_long")]
3235 (define_insn "neon_vaba<sup><mode>"
3236 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3237 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3238 (match_operand:VDQIW 3 "s_register_operand" "w")]
3240 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3242 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3243 [(set_attr "type" "neon_arith_acc<q>")]
3246 (define_insn "neon_vabal<sup><mode>"
3247 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3248 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3249 (match_operand:VW 3 "s_register_operand" "w")]
3251 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3253 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3254 [(set_attr "type" "neon_arith_acc<q>")]
3257 (define_insn "neon_v<maxmin><sup><mode>"
3258 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3259 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3260 (match_operand:VDQIW 2 "s_register_operand" "w")]
3263 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264 [(set_attr "type" "neon_minmax<q>")]
3267 (define_insn "neon_v<maxmin>f<mode>"
3268 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3269 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3270 (match_operand:VCVTF 2 "s_register_operand" "w")]
3273 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3274 [(set_attr "type" "neon_fp_minmax_s<q>")]
3277 (define_insn "neon_v<maxmin>f<mode>"
3278 [(set (match_operand:VH 0 "s_register_operand" "=w")
3280 [(match_operand:VH 1 "s_register_operand" "w")
3281 (match_operand:VH 2 "s_register_operand" "w")]
3283 "TARGET_NEON_FP16INST"
3284 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3285 [(set_attr "type" "neon_fp_minmax_s<q>")]
3288 (define_insn "neon_vp<maxmin>fv4hf"
3289 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3291 [(match_operand:V4HF 1 "s_register_operand" "w")
3292 (match_operand:V4HF 2 "s_register_operand" "w")]
3294 "TARGET_NEON_FP16INST"
3295 "vp<maxmin>.f16\t%P0, %P1, %P2"
3296 [(set_attr "type" "neon_reduc_minmax")]
3299 (define_insn "neon_<fmaxmin_op><mode>"
3301 (match_operand:VH 0 "s_register_operand" "=w")
3303 [(match_operand:VH 1 "s_register_operand" "w")
3304 (match_operand:VH 2 "s_register_operand" "w")]
3306 "TARGET_NEON_FP16INST"
3307 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3308 [(set_attr "type" "neon_fp_minmax_s<q>")]
3311 ;; v<maxmin>nm intrinsics.
3312 (define_insn "neon_<fmaxmin_op><mode>"
3313 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3314 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3315 (match_operand:VCVTF 2 "s_register_operand" "w")]
3317 "TARGET_NEON && TARGET_VFP5"
3318 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3319 [(set_attr "type" "neon_fp_minmax_s<q>")]
3322 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3323 (define_insn "<fmaxmin><mode>3"
3324 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3325 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3326 (match_operand:VCVTF 2 "s_register_operand" "w")]
3328 "TARGET_NEON && TARGET_VFP5"
3329 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3330 [(set_attr "type" "neon_fp_minmax_s<q>")]
3333 (define_expand "neon_vpadd<mode>"
3334 [(match_operand:VD 0 "s_register_operand" "=w")
3335 (match_operand:VD 1 "s_register_operand" "w")
3336 (match_operand:VD 2 "s_register_operand" "w")]
3339 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3344 (define_insn "neon_vpaddl<sup><mode>"
3345 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3346 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3349 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3350 [(set_attr "type" "neon_reduc_add_long")]
3353 (define_insn "neon_vpadal<sup><mode>"
3354 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3355 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3356 (match_operand:VDQIW 2 "s_register_operand" "w")]
3359 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3360 [(set_attr "type" "neon_reduc_add_acc")]
3363 (define_insn "neon_vp<maxmin><sup><mode>"
3364 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3365 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3366 (match_operand:VDI 2 "s_register_operand" "w")]
3369 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3370 [(set_attr "type" "neon_reduc_minmax<q>")]
3373 (define_insn "neon_vp<maxmin>f<mode>"
3374 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3375 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3376 (match_operand:VCVTF 2 "s_register_operand" "w")]
3379 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3383 (define_insn "neon_vrecps<mode>"
3384 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386 (match_operand:VCVTF 2 "s_register_operand" "w")]
3389 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390 [(set_attr "type" "neon_fp_recps_s<q>")]
3393 (define_insn "neon_vrecps<mode>"
3395 (match_operand:VH 0 "s_register_operand" "=w")
3396 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397 (match_operand:VH 2 "s_register_operand" "w")]
3399 "TARGET_NEON_FP16INST"
3400 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401 [(set_attr "type" "neon_fp_recps_s<q>")]
3404 (define_insn "neon_vrsqrts<mode>"
3405 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3406 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3407 (match_operand:VCVTF 2 "s_register_operand" "w")]
3410 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3411 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3414 (define_insn "neon_vrsqrts<mode>"
3416 (match_operand:VH 0 "s_register_operand" "=w")
3417 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3418 (match_operand:VH 2 "s_register_operand" "w")]
3420 "TARGET_NEON_FP16INST"
3421 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3422 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3425 (define_expand "neon_vabs<mode>"
3426 [(match_operand:VDQW 0 "s_register_operand" "")
3427 (match_operand:VDQW 1 "s_register_operand" "")]
3430 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3434 (define_insn "neon_vqabs<mode>"
3435 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3436 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3439 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3440 [(set_attr "type" "neon_qabs<q>")]
3443 (define_insn "neon_bswap<mode>"
3444 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3445 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3447 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3448 [(set_attr "type" "neon_rev<q>")]
3451 (define_expand "neon_vneg<mode>"
3452 [(match_operand:VDQW 0 "s_register_operand" "")
3453 (match_operand:VDQW 1 "s_register_operand" "")]
3456 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3460 ;; These instructions map to the __builtins for the Dot Product operations.
3461 (define_insn "neon_<sup>dot<vsi2qi>"
3462 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3463 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3464 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3465 "register_operand" "w")
3466 (match_operand:<VSI2QI> 3
3467 "register_operand" "w")]
3470 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3471 [(set_attr "type" "neon_dot")]
3474 ;; These instructions map to the __builtins for the Dot Product
3475 ;; indexed operations.
3476 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3477 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3478 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3479 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3480 "register_operand" "w")
3481 (match_operand:V8QI 3 "register_operand" "t")
3482 (match_operand:SI 4 "immediate_operand" "i")]
3487 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3488 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3490 [(set_attr "type" "neon_dot")]
3493 ;; These expands map to the Dot Product optab the vectorizer checks for.
3494 ;; The auto-vectorizer expects a dot product builtin that also does an
3495 ;; accumulation into the provided register.
3496 ;; Given the following pattern
3498 ;; for (i=0; i<len; i++) {
3504 ;; This can be auto-vectorized to
3505 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3507 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3508 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3509 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3512 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3513 (define_expand "<sup>dot_prod<vsi2qi>"
3514 [(set (match_operand:VCVTI 0 "register_operand")
3515 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3517 (match_operand:<VSI2QI> 2
3518 "register_operand")]
3520 (match_operand:VCVTI 3 "register_operand")))]
3524 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3526 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3530 (define_expand "neon_copysignf<mode>"
3531 [(match_operand:VCVTF 0 "register_operand")
3532 (match_operand:VCVTF 1 "register_operand")
3533 (match_operand:VCVTF 2 "register_operand")]
3537 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3538 rtx c = GEN_INT (0x80000000);
3540 emit_move_insn (v_bitmask,
3541 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3542 emit_move_insn (operands[0], operands[2]);
3543 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3544 <VCVTF:V_cmp_result>mode, 0);
3545 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3552 (define_insn "neon_vqneg<mode>"
3553 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3554 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3557 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3558 [(set_attr "type" "neon_qneg<q>")]
3561 (define_insn "neon_vcls<mode>"
3562 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3563 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3566 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3567 [(set_attr "type" "neon_cls<q>")]
3570 (define_insn "clz<mode>2"
3571 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3572 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3574 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3575 [(set_attr "type" "neon_cnt<q>")]
3578 (define_expand "neon_vclz<mode>"
3579 [(match_operand:VDQIW 0 "s_register_operand" "")
3580 (match_operand:VDQIW 1 "s_register_operand" "")]
3583 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3587 (define_insn "popcount<mode>2"
3588 [(set (match_operand:VE 0 "s_register_operand" "=w")
3589 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3591 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3592 [(set_attr "type" "neon_cnt<q>")]
3595 (define_expand "neon_vcnt<mode>"
3596 [(match_operand:VE 0 "s_register_operand" "=w")
3597 (match_operand:VE 1 "s_register_operand" "w")]
3600 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3604 (define_insn "neon_vrecpe<mode>"
3605 [(set (match_operand:VH 0 "s_register_operand" "=w")
3606 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3608 "TARGET_NEON_FP16INST"
3609 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3610 [(set_attr "type" "neon_fp_recpe_s<q>")]
3613 (define_insn "neon_vrecpe<mode>"
3614 [(set (match_operand:V32 0 "s_register_operand" "=w")
3615 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3618 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3619 [(set_attr "type" "neon_fp_recpe_s<q>")]
3622 (define_insn "neon_vrsqrte<mode>"
3623 [(set (match_operand:V32 0 "s_register_operand" "=w")
3624 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3627 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3628 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3631 (define_expand "neon_vmvn<mode>"
3632 [(match_operand:VDQIW 0 "s_register_operand" "")
3633 (match_operand:VDQIW 1 "s_register_operand" "")]
3636 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3640 (define_insn "neon_vget_lane<mode>_sext_internal"
3641 [(set (match_operand:SI 0 "s_register_operand" "=r")
3643 (vec_select:<V_elem>
3644 (match_operand:VD 1 "s_register_operand" "w")
3645 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3648 if (BYTES_BIG_ENDIAN)
3650 int elt = INTVAL (operands[2]);
3651 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3652 operands[2] = GEN_INT (elt);
3654 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3656 [(set_attr "type" "neon_to_gp")]
3659 (define_insn "neon_vget_lane<mode>_zext_internal"
3660 [(set (match_operand:SI 0 "s_register_operand" "=r")
3662 (vec_select:<V_elem>
3663 (match_operand:VD 1 "s_register_operand" "w")
3664 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3667 if (BYTES_BIG_ENDIAN)
3669 int elt = INTVAL (operands[2]);
3670 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3671 operands[2] = GEN_INT (elt);
3673 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3675 [(set_attr "type" "neon_to_gp")]
3678 (define_insn "neon_vget_lane<mode>_sext_internal"
3679 [(set (match_operand:SI 0 "s_register_operand" "=r")
3681 (vec_select:<V_elem>
3682 (match_operand:VQ2 1 "s_register_operand" "w")
3683 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3687 int regno = REGNO (operands[1]);
3688 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3689 unsigned int elt = INTVAL (operands[2]);
3690 unsigned int elt_adj = elt % halfelts;
3692 if (BYTES_BIG_ENDIAN)
3693 elt_adj = halfelts - 1 - elt_adj;
3695 ops[0] = operands[0];
3696 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3697 ops[2] = GEN_INT (elt_adj);
3698 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3702 [(set_attr "type" "neon_to_gp_q")]
3705 (define_insn "neon_vget_lane<mode>_zext_internal"
3706 [(set (match_operand:SI 0 "s_register_operand" "=r")
3708 (vec_select:<V_elem>
3709 (match_operand:VQ2 1 "s_register_operand" "w")
3710 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3714 int regno = REGNO (operands[1]);
3715 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3716 unsigned int elt = INTVAL (operands[2]);
3717 unsigned int elt_adj = elt % halfelts;
3719 if (BYTES_BIG_ENDIAN)
3720 elt_adj = halfelts - 1 - elt_adj;
3722 ops[0] = operands[0];
3723 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3724 ops[2] = GEN_INT (elt_adj);
3725 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3729 [(set_attr "type" "neon_to_gp_q")]
3732 (define_expand "neon_vget_lane<mode>"
3733 [(match_operand:<V_ext> 0 "s_register_operand" "")
3734 (match_operand:VDQW 1 "s_register_operand" "")
3735 (match_operand:SI 2 "immediate_operand" "")]
3738 if (BYTES_BIG_ENDIAN)
3740 /* The intrinsics are defined in terms of a model where the
3741 element ordering in memory is vldm order, whereas the generic
3742 RTL is defined in terms of a model where the element ordering
3743 in memory is array order. Convert the lane number to conform
3745 unsigned int elt = INTVAL (operands[2]);
3746 unsigned int reg_nelts
3747 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3748 elt ^= reg_nelts - 1;
3749 operands[2] = GEN_INT (elt);
3752 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3753 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3756 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3762 (define_expand "neon_vget_laneu<mode>"
3763 [(match_operand:<V_ext> 0 "s_register_operand" "")
3764 (match_operand:VDQIW 1 "s_register_operand" "")
3765 (match_operand:SI 2 "immediate_operand" "")]
3768 if (BYTES_BIG_ENDIAN)
3770 /* The intrinsics are defined in terms of a model where the
3771 element ordering in memory is vldm order, whereas the generic
3772 RTL is defined in terms of a model where the element ordering
3773 in memory is array order. Convert the lane number to conform
3775 unsigned int elt = INTVAL (operands[2]);
3776 unsigned int reg_nelts
3777 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3778 elt ^= reg_nelts - 1;
3779 operands[2] = GEN_INT (elt);
3782 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3783 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3786 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3792 (define_expand "neon_vget_lanedi"
3793 [(match_operand:DI 0 "s_register_operand" "=r")
3794 (match_operand:DI 1 "s_register_operand" "w")
3795 (match_operand:SI 2 "immediate_operand" "")]
3798 emit_move_insn (operands[0], operands[1]);
3802 (define_expand "neon_vget_lanev2di"
3803 [(match_operand:DI 0 "s_register_operand" "")
3804 (match_operand:V2DI 1 "s_register_operand" "")
3805 (match_operand:SI 2 "immediate_operand" "")]
3810 if (BYTES_BIG_ENDIAN)
3812 /* The intrinsics are defined in terms of a model where the
3813 element ordering in memory is vldm order, whereas the generic
3814 RTL is defined in terms of a model where the element ordering
3815 in memory is array order. Convert the lane number to conform
3817 unsigned int elt = INTVAL (operands[2]);
3818 unsigned int reg_nelts = 2;
3819 elt ^= reg_nelts - 1;
3820 operands[2] = GEN_INT (elt);
3823 lane = INTVAL (operands[2]);
3824 gcc_assert ((lane ==0) || (lane == 1));
3825 emit_move_insn (operands[0], lane == 0
3826 ? gen_lowpart (DImode, operands[1])
3827 : gen_highpart (DImode, operands[1]));
3831 (define_expand "neon_vset_lane<mode>"
3832 [(match_operand:VDQ 0 "s_register_operand" "=w")
3833 (match_operand:<V_elem> 1 "s_register_operand" "r")
3834 (match_operand:VDQ 2 "s_register_operand" "0")
3835 (match_operand:SI 3 "immediate_operand" "i")]
3838 unsigned int elt = INTVAL (operands[3]);
3840 if (BYTES_BIG_ENDIAN)
3842 unsigned int reg_nelts
3843 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3844 elt ^= reg_nelts - 1;
3847 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3848 GEN_INT (1 << elt), operands[2]));
3852 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3854 (define_expand "neon_vset_lanedi"
3855 [(match_operand:DI 0 "s_register_operand" "=w")
3856 (match_operand:DI 1 "s_register_operand" "r")
3857 (match_operand:DI 2 "s_register_operand" "0")
3858 (match_operand:SI 3 "immediate_operand" "i")]
3861 emit_move_insn (operands[0], operands[1]);
3865 (define_expand "neon_vcreate<mode>"
3866 [(match_operand:VD_RE 0 "s_register_operand" "")
3867 (match_operand:DI 1 "general_operand" "")]
3870 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3871 emit_move_insn (operands[0], src);
3875 (define_insn "neon_vdup_n<mode>"
3876 [(set (match_operand:VX 0 "s_register_operand" "=w")
3877 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3879 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3880 [(set_attr "type" "neon_from_gp<q>")]
3883 (define_insn "neon_vdup_nv4hf"
3884 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3885 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3888 [(set_attr "type" "neon_from_gp")]
3891 (define_insn "neon_vdup_nv8hf"
3892 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3893 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3896 [(set_attr "type" "neon_from_gp_q")]
3899 (define_insn "neon_vdup_n<mode>"
3900 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3901 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3904 vdup.<V_sz_elem>\t%<V_reg>0, %1
3905 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3906 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3909 (define_expand "neon_vdup_ndi"
3910 [(match_operand:DI 0 "s_register_operand" "=w")
3911 (match_operand:DI 1 "s_register_operand" "r")]
3914 emit_move_insn (operands[0], operands[1]);
3919 (define_insn "neon_vdup_nv2di"
3920 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3921 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3924 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3925 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3926 [(set_attr "length" "8")
3927 (set_attr "type" "multiple")]
3930 (define_insn "neon_vdup_lane<mode>_internal"
3931 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3933 (vec_select:<V_elem>
3934 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3935 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3938 if (BYTES_BIG_ENDIAN)
3940 int elt = INTVAL (operands[2]);
3941 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3942 operands[2] = GEN_INT (elt);
3945 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3947 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3949 [(set_attr "type" "neon_dup<q>")]
3952 (define_insn "neon_vdup_lane<mode>_internal"
3953 [(set (match_operand:VH 0 "s_register_operand" "=w")
3955 (vec_select:<V_elem>
3956 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3957 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3958 "TARGET_NEON && TARGET_FP16"
3960 if (BYTES_BIG_ENDIAN)
3962 int elt = INTVAL (operands[2]);
3963 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3964 operands[2] = GEN_INT (elt);
3967 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3969 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3971 [(set_attr "type" "neon_dup<q>")]
3974 (define_expand "neon_vdup_lane<mode>"
3975 [(match_operand:VDQW 0 "s_register_operand" "=w")
3976 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3977 (match_operand:SI 2 "immediate_operand" "i")]
3980 if (BYTES_BIG_ENDIAN)
3982 unsigned int elt = INTVAL (operands[2]);
3983 unsigned int reg_nelts
3984 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3985 elt ^= reg_nelts - 1;
3986 operands[2] = GEN_INT (elt);
3988 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3993 (define_expand "neon_vdup_lane<mode>"
3994 [(match_operand:VH 0 "s_register_operand")
3995 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3996 (match_operand:SI 2 "immediate_operand")]
3997 "TARGET_NEON && TARGET_FP16"
3999 if (BYTES_BIG_ENDIAN)
4001 unsigned int elt = INTVAL (operands[2]);
4002 unsigned int reg_nelts
4003 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4004 elt ^= reg_nelts - 1;
4005 operands[2] = GEN_INT (elt);
4007 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4012 ; Scalar index is ignored, since only zero is valid here.
4013 (define_expand "neon_vdup_lanedi"
4014 [(match_operand:DI 0 "s_register_operand" "=w")
4015 (match_operand:DI 1 "s_register_operand" "w")
4016 (match_operand:SI 2 "immediate_operand" "i")]
4019 emit_move_insn (operands[0], operands[1]);
4023 ; Likewise for v2di, as the DImode second operand has only a single element.
4024 (define_expand "neon_vdup_lanev2di"
4025 [(match_operand:V2DI 0 "s_register_operand" "=w")
4026 (match_operand:DI 1 "s_register_operand" "w")
4027 (match_operand:SI 2 "immediate_operand" "i")]
4030 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4034 ; Disabled before reload because we don't want combine doing something silly,
4035 ; but used by the post-reload expansion of neon_vcombine.
4036 (define_insn "*neon_vswp<mode>"
4037 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4038 (match_operand:VDQX 1 "s_register_operand" "+w"))
4039 (set (match_dup 1) (match_dup 0))]
4040 "TARGET_NEON && reload_completed"
4041 "vswp\t%<V_reg>0, %<V_reg>1"
4042 [(set_attr "type" "neon_permute<q>")]
4045 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4047 ;; FIXME: A different implementation of this builtin could make it much
4048 ;; more likely that we wouldn't actually need to output anything (we could make
4049 ;; it so that the reg allocator puts things in the right places magically
4050 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4052 (define_insn_and_split "neon_vcombine<mode>"
4053 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4054 (vec_concat:<V_DOUBLE>
4055 (match_operand:VDX 1 "s_register_operand" "w")
4056 (match_operand:VDX 2 "s_register_operand" "w")))]
4059 "&& reload_completed"
4062 neon_split_vcombine (operands);
4065 [(set_attr "type" "multiple")]
4068 (define_expand "neon_vget_high<mode>"
4069 [(match_operand:<V_HALF> 0 "s_register_operand")
4070 (match_operand:VQX 1 "s_register_operand")]
4073 emit_move_insn (operands[0],
4074 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4075 GET_MODE_SIZE (<V_HALF>mode)));
4079 (define_expand "neon_vget_low<mode>"
4080 [(match_operand:<V_HALF> 0 "s_register_operand")
4081 (match_operand:VQX 1 "s_register_operand")]
4084 emit_move_insn (operands[0],
4085 simplify_gen_subreg (<V_HALF>mode, operands[1],
4090 (define_insn "float<mode><V_cvtto>2"
4091 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4092 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4093 "TARGET_NEON && !flag_rounding_math"
4094 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4095 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4098 (define_insn "floatuns<mode><V_cvtto>2"
4099 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4100 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4101 "TARGET_NEON && !flag_rounding_math"
4102 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4103 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4106 (define_insn "fix_trunc<mode><V_cvtto>2"
4107 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4108 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4110 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4111 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4114 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4115 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4116 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4118 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4119 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4122 (define_insn "neon_vcvt<sup><mode>"
4123 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4124 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4127 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4128 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4131 (define_insn "neon_vcvt<sup><mode>"
4132 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4133 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4136 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4137 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4140 (define_insn "neon_vcvtv4sfv4hf"
4141 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4142 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4144 "TARGET_NEON && TARGET_FP16"
4145 "vcvt.f32.f16\t%q0, %P1"
4146 [(set_attr "type" "neon_fp_cvt_widen_h")]
4149 (define_insn "neon_vcvtv4hfv4sf"
4150 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4151 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4153 "TARGET_NEON && TARGET_FP16"
4154 "vcvt.f16.f32\t%P0, %q1"
4155 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4158 (define_insn "neon_vcvt<sup><mode>"
4160 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4162 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4164 "TARGET_NEON_FP16INST"
4165 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4166 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4169 (define_insn "neon_vcvt<sup><mode>"
4171 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4173 [(match_operand:VH 1 "s_register_operand" "w")]
4175 "TARGET_NEON_FP16INST"
4176 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4177 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4180 (define_insn "neon_vcvt<sup>_n<mode>"
4181 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4182 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4183 (match_operand:SI 2 "immediate_operand" "i")]
4187 arm_const_bounds (operands[2], 1, 33);
4188 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4190 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4193 (define_insn "neon_vcvt<sup>_n<mode>"
4194 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4196 [(match_operand:VH 1 "s_register_operand" "w")
4197 (match_operand:SI 2 "immediate_operand" "i")]
4199 "TARGET_NEON_FP16INST"
4201 arm_const_bounds (operands[2], 0, 17);
4202 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4204 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4207 (define_insn "neon_vcvt<sup>_n<mode>"
4208 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4209 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4210 (match_operand:SI 2 "immediate_operand" "i")]
4214 arm_const_bounds (operands[2], 1, 33);
4215 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4217 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4220 (define_insn "neon_vcvt<sup>_n<mode>"
4221 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4223 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4224 (match_operand:SI 2 "immediate_operand" "i")]
4226 "TARGET_NEON_FP16INST"
4228 arm_const_bounds (operands[2], 0, 17);
4229 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4231 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4234 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4236 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4238 [(match_operand:VH 1 "s_register_operand" "w")]
4240 "TARGET_NEON_FP16INST"
4241 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4242 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4245 (define_insn "neon_vmovn<mode>"
4246 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4247 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4250 "vmovn.<V_if_elem>\t%P0, %q1"
4251 [(set_attr "type" "neon_shift_imm_narrow_q")]
4254 (define_insn "neon_vqmovn<sup><mode>"
4255 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4256 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4259 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4260 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4263 (define_insn "neon_vqmovun<mode>"
4264 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4265 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4268 "vqmovun.<V_s_elem>\t%P0, %q1"
4269 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4272 (define_insn "neon_vmovl<sup><mode>"
4273 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4274 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4277 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4278 [(set_attr "type" "neon_shift_imm_long")]
4281 (define_insn "neon_vmul_lane<mode>"
4282 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4283 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4284 (match_operand:VMD 2 "s_register_operand"
4285 "<scalar_mul_constraint>")
4286 (match_operand:SI 3 "immediate_operand" "i")]
4290 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4293 (if_then_else (match_test "<Is_float_mode>")
4294 (const_string "neon_fp_mul_s_scalar<q>")
4295 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4298 (define_insn "neon_vmul_lane<mode>"
4299 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4300 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4301 (match_operand:<V_HALF> 2 "s_register_operand"
4302 "<scalar_mul_constraint>")
4303 (match_operand:SI 3 "immediate_operand" "i")]
4307 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4310 (if_then_else (match_test "<Is_float_mode>")
4311 (const_string "neon_fp_mul_s_scalar<q>")
4312 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4315 (define_insn "neon_vmul_lane<mode>"
4316 [(set (match_operand:VH 0 "s_register_operand" "=w")
4317 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4318 (match_operand:V4HF 2 "s_register_operand"
4319 "<scalar_mul_constraint>")
4320 (match_operand:SI 3 "immediate_operand" "i")]
4322 "TARGET_NEON_FP16INST"
4323 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4324 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4327 (define_insn "neon_vmull<sup>_lane<mode>"
4328 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4329 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4330 (match_operand:VMDI 2 "s_register_operand"
4331 "<scalar_mul_constraint>")
4332 (match_operand:SI 3 "immediate_operand" "i")]
4336 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4338 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4341 (define_insn "neon_vqdmull_lane<mode>"
4342 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4343 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4344 (match_operand:VMDI 2 "s_register_operand"
4345 "<scalar_mul_constraint>")
4346 (match_operand:SI 3 "immediate_operand" "i")]
4347 UNSPEC_VQDMULL_LANE))]
4350 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4352 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4355 (define_insn "neon_vq<r>dmulh_lane<mode>"
4356 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4357 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4358 (match_operand:<V_HALF> 2 "s_register_operand"
4359 "<scalar_mul_constraint>")
4360 (match_operand:SI 3 "immediate_operand" "i")]
4364 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4366 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4369 (define_insn "neon_vq<r>dmulh_lane<mode>"
4370 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4371 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4372 (match_operand:VMDI 2 "s_register_operand"
4373 "<scalar_mul_constraint>")
4374 (match_operand:SI 3 "immediate_operand" "i")]
4378 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4380 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4383 ;; vqrdmlah_lane, vqrdmlsh_lane
4384 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4385 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4386 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4387 (match_operand:VMQI 2 "s_register_operand" "w")
4388 (match_operand:<V_HALF> 3 "s_register_operand"
4389 "<scalar_mul_constraint>")
4390 (match_operand:SI 4 "immediate_operand" "i")]
4395 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4397 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4400 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4401 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4402 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4403 (match_operand:VMDI 2 "s_register_operand" "w")
4404 (match_operand:VMDI 3 "s_register_operand"
4405 "<scalar_mul_constraint>")
4406 (match_operand:SI 4 "immediate_operand" "i")]
4411 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4413 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4416 (define_insn "neon_vmla_lane<mode>"
4417 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4418 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4419 (match_operand:VMD 2 "s_register_operand" "w")
4420 (match_operand:VMD 3 "s_register_operand"
4421 "<scalar_mul_constraint>")
4422 (match_operand:SI 4 "immediate_operand" "i")]
4426 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4429 (if_then_else (match_test "<Is_float_mode>")
4430 (const_string "neon_fp_mla_s_scalar<q>")
4431 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4434 (define_insn "neon_vmla_lane<mode>"
4435 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4436 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4437 (match_operand:VMQ 2 "s_register_operand" "w")
4438 (match_operand:<V_HALF> 3 "s_register_operand"
4439 "<scalar_mul_constraint>")
4440 (match_operand:SI 4 "immediate_operand" "i")]
4444 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4447 (if_then_else (match_test "<Is_float_mode>")
4448 (const_string "neon_fp_mla_s_scalar<q>")
4449 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4452 (define_insn "neon_vmlal<sup>_lane<mode>"
4453 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4454 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4455 (match_operand:VMDI 2 "s_register_operand" "w")
4456 (match_operand:VMDI 3 "s_register_operand"
4457 "<scalar_mul_constraint>")
4458 (match_operand:SI 4 "immediate_operand" "i")]
4462 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4464 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4467 (define_insn "neon_vqdmlal_lane<mode>"
4468 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4469 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4470 (match_operand:VMDI 2 "s_register_operand" "w")
4471 (match_operand:VMDI 3 "s_register_operand"
4472 "<scalar_mul_constraint>")
4473 (match_operand:SI 4 "immediate_operand" "i")]
4474 UNSPEC_VQDMLAL_LANE))]
4477 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4479 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4482 (define_insn "neon_vmls_lane<mode>"
4483 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4484 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4485 (match_operand:VMD 2 "s_register_operand" "w")
4486 (match_operand:VMD 3 "s_register_operand"
4487 "<scalar_mul_constraint>")
4488 (match_operand:SI 4 "immediate_operand" "i")]
4492 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4495 (if_then_else (match_test "<Is_float_mode>")
4496 (const_string "neon_fp_mla_s_scalar<q>")
4497 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4500 (define_insn "neon_vmls_lane<mode>"
4501 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4502 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4503 (match_operand:VMQ 2 "s_register_operand" "w")
4504 (match_operand:<V_HALF> 3 "s_register_operand"
4505 "<scalar_mul_constraint>")
4506 (match_operand:SI 4 "immediate_operand" "i")]
4510 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4513 (if_then_else (match_test "<Is_float_mode>")
4514 (const_string "neon_fp_mla_s_scalar<q>")
4515 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4518 (define_insn "neon_vmlsl<sup>_lane<mode>"
4519 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4520 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4521 (match_operand:VMDI 2 "s_register_operand" "w")
4522 (match_operand:VMDI 3 "s_register_operand"
4523 "<scalar_mul_constraint>")
4524 (match_operand:SI 4 "immediate_operand" "i")]
4528 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4530 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4533 (define_insn "neon_vqdmlsl_lane<mode>"
4534 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4535 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4536 (match_operand:VMDI 2 "s_register_operand" "w")
4537 (match_operand:VMDI 3 "s_register_operand"
4538 "<scalar_mul_constraint>")
4539 (match_operand:SI 4 "immediate_operand" "i")]
4540 UNSPEC_VQDMLSL_LANE))]
4543 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4545 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4548 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4549 ; core register into a temp register, then use a scalar taken from that. This
4550 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4551 ; or extracted from another vector. The latter case it's currently better to
4552 ; use the "_lane" variant, and the former case can probably be implemented
4553 ; using vld1_lane, but that hasn't been done yet.
4555 (define_expand "neon_vmul_n<mode>"
4556 [(match_operand:VMD 0 "s_register_operand" "")
4557 (match_operand:VMD 1 "s_register_operand" "")
4558 (match_operand:<V_elem> 2 "s_register_operand" "")]
4561 rtx tmp = gen_reg_rtx (<MODE>mode);
4562 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4563 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4568 (define_expand "neon_vmul_n<mode>"
4569 [(match_operand:VMQ 0 "s_register_operand" "")
4570 (match_operand:VMQ 1 "s_register_operand" "")
4571 (match_operand:<V_elem> 2 "s_register_operand" "")]
4574 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4575 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4576 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4581 (define_expand "neon_vmul_n<mode>"
4582 [(match_operand:VH 0 "s_register_operand")
4583 (match_operand:VH 1 "s_register_operand")
4584 (match_operand:<V_elem> 2 "s_register_operand")]
4585 "TARGET_NEON_FP16INST"
4587 rtx tmp = gen_reg_rtx (V4HFmode);
4588 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4589 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4594 (define_expand "neon_vmulls_n<mode>"
4595 [(match_operand:<V_widen> 0 "s_register_operand" "")
4596 (match_operand:VMDI 1 "s_register_operand" "")
4597 (match_operand:<V_elem> 2 "s_register_operand" "")]
4600 rtx tmp = gen_reg_rtx (<MODE>mode);
4601 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4602 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4607 (define_expand "neon_vmullu_n<mode>"
4608 [(match_operand:<V_widen> 0 "s_register_operand" "")
4609 (match_operand:VMDI 1 "s_register_operand" "")
4610 (match_operand:<V_elem> 2 "s_register_operand" "")]
4613 rtx tmp = gen_reg_rtx (<MODE>mode);
4614 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4615 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4620 (define_expand "neon_vqdmull_n<mode>"
4621 [(match_operand:<V_widen> 0 "s_register_operand" "")
4622 (match_operand:VMDI 1 "s_register_operand" "")
4623 (match_operand:<V_elem> 2 "s_register_operand" "")]
4626 rtx tmp = gen_reg_rtx (<MODE>mode);
4627 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4628 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4633 (define_expand "neon_vqdmulh_n<mode>"
4634 [(match_operand:VMDI 0 "s_register_operand" "")
4635 (match_operand:VMDI 1 "s_register_operand" "")
4636 (match_operand:<V_elem> 2 "s_register_operand" "")]
4639 rtx tmp = gen_reg_rtx (<MODE>mode);
4640 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4641 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4646 (define_expand "neon_vqrdmulh_n<mode>"
4647 [(match_operand:VMDI 0 "s_register_operand" "")
4648 (match_operand:VMDI 1 "s_register_operand" "")
4649 (match_operand:<V_elem> 2 "s_register_operand" "")]
4652 rtx tmp = gen_reg_rtx (<MODE>mode);
4653 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4654 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4659 (define_expand "neon_vqdmulh_n<mode>"
4660 [(match_operand:VMQI 0 "s_register_operand" "")
4661 (match_operand:VMQI 1 "s_register_operand" "")
4662 (match_operand:<V_elem> 2 "s_register_operand" "")]
4665 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4666 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4667 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4672 (define_expand "neon_vqrdmulh_n<mode>"
4673 [(match_operand:VMQI 0 "s_register_operand" "")
4674 (match_operand:VMQI 1 "s_register_operand" "")
4675 (match_operand:<V_elem> 2 "s_register_operand" "")]
4678 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4679 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4680 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4685 (define_expand "neon_vmla_n<mode>"
4686 [(match_operand:VMD 0 "s_register_operand" "")
4687 (match_operand:VMD 1 "s_register_operand" "")
4688 (match_operand:VMD 2 "s_register_operand" "")
4689 (match_operand:<V_elem> 3 "s_register_operand" "")]
4692 rtx tmp = gen_reg_rtx (<MODE>mode);
4693 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4694 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4699 (define_expand "neon_vmla_n<mode>"
4700 [(match_operand:VMQ 0 "s_register_operand" "")
4701 (match_operand:VMQ 1 "s_register_operand" "")
4702 (match_operand:VMQ 2 "s_register_operand" "")
4703 (match_operand:<V_elem> 3 "s_register_operand" "")]
4706 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4707 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4708 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4713 (define_expand "neon_vmlals_n<mode>"
4714 [(match_operand:<V_widen> 0 "s_register_operand" "")
4715 (match_operand:<V_widen> 1 "s_register_operand" "")
4716 (match_operand:VMDI 2 "s_register_operand" "")
4717 (match_operand:<V_elem> 3 "s_register_operand" "")]
4720 rtx tmp = gen_reg_rtx (<MODE>mode);
4721 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4722 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4727 (define_expand "neon_vmlalu_n<mode>"
4728 [(match_operand:<V_widen> 0 "s_register_operand" "")
4729 (match_operand:<V_widen> 1 "s_register_operand" "")
4730 (match_operand:VMDI 2 "s_register_operand" "")
4731 (match_operand:<V_elem> 3 "s_register_operand" "")]
4734 rtx tmp = gen_reg_rtx (<MODE>mode);
4735 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4736 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4741 (define_expand "neon_vqdmlal_n<mode>"
4742 [(match_operand:<V_widen> 0 "s_register_operand" "")
4743 (match_operand:<V_widen> 1 "s_register_operand" "")
4744 (match_operand:VMDI 2 "s_register_operand" "")
4745 (match_operand:<V_elem> 3 "s_register_operand" "")]
4748 rtx tmp = gen_reg_rtx (<MODE>mode);
4749 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4750 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4755 (define_expand "neon_vmls_n<mode>"
4756 [(match_operand:VMD 0 "s_register_operand" "")
4757 (match_operand:VMD 1 "s_register_operand" "")
4758 (match_operand:VMD 2 "s_register_operand" "")
4759 (match_operand:<V_elem> 3 "s_register_operand" "")]
4762 rtx tmp = gen_reg_rtx (<MODE>mode);
4763 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4764 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4769 (define_expand "neon_vmls_n<mode>"
4770 [(match_operand:VMQ 0 "s_register_operand" "")
4771 (match_operand:VMQ 1 "s_register_operand" "")
4772 (match_operand:VMQ 2 "s_register_operand" "")
4773 (match_operand:<V_elem> 3 "s_register_operand" "")]
4776 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4777 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4778 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4783 (define_expand "neon_vmlsls_n<mode>"
4784 [(match_operand:<V_widen> 0 "s_register_operand" "")
4785 (match_operand:<V_widen> 1 "s_register_operand" "")
4786 (match_operand:VMDI 2 "s_register_operand" "")
4787 (match_operand:<V_elem> 3 "s_register_operand" "")]
4790 rtx tmp = gen_reg_rtx (<MODE>mode);
4791 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4792 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4797 (define_expand "neon_vmlslu_n<mode>"
4798 [(match_operand:<V_widen> 0 "s_register_operand" "")
4799 (match_operand:<V_widen> 1 "s_register_operand" "")
4800 (match_operand:VMDI 2 "s_register_operand" "")
4801 (match_operand:<V_elem> 3 "s_register_operand" "")]
4804 rtx tmp = gen_reg_rtx (<MODE>mode);
4805 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4806 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4811 (define_expand "neon_vqdmlsl_n<mode>"
4812 [(match_operand:<V_widen> 0 "s_register_operand" "")
4813 (match_operand:<V_widen> 1 "s_register_operand" "")
4814 (match_operand:VMDI 2 "s_register_operand" "")
4815 (match_operand:<V_elem> 3 "s_register_operand" "")]
4818 rtx tmp = gen_reg_rtx (<MODE>mode);
4819 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4820 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4825 (define_insn "neon_vext<mode>"
4826 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4827 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4828 (match_operand:VDQX 2 "s_register_operand" "w")
4829 (match_operand:SI 3 "immediate_operand" "i")]
4833 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4834 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4836 [(set_attr "type" "neon_ext<q>")]
4839 (define_insn "neon_vrev64<mode>"
4840 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4841 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4844 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4845 [(set_attr "type" "neon_rev<q>")]
4848 (define_insn "neon_vrev32<mode>"
4849 [(set (match_operand:VX 0 "s_register_operand" "=w")
4850 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4853 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4854 [(set_attr "type" "neon_rev<q>")]
4857 (define_insn "neon_vrev16<mode>"
4858 [(set (match_operand:VE 0 "s_register_operand" "=w")
4859 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4862 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4863 [(set_attr "type" "neon_rev<q>")]
4866 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4867 ; allocation. For an intrinsic of form:
4868 ; rD = vbsl_* (rS, rN, rM)
4869 ; We can use any of:
4870 ; vbsl rS, rN, rM (if D = S)
4871 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4872 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4874 (define_insn "neon_vbsl<mode>_internal"
4875 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4876 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4877 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4878 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4882 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4883 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4884 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4885 [(set_attr "type" "neon_bsl<q>")]
4888 (define_expand "neon_vbsl<mode>"
4889 [(set (match_operand:VDQX 0 "s_register_operand" "")
4890 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4891 (match_operand:VDQX 2 "s_register_operand" "")
4892 (match_operand:VDQX 3 "s_register_operand" "")]
4896 /* We can't alias operands together if they have different modes. */
4897 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4901 (define_insn "neon_v<shift_op><sup><mode>"
4902 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4903 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4904 (match_operand:VDQIX 2 "s_register_operand" "w")]
4907 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4908 [(set_attr "type" "neon_shift_imm<q>")]
4912 (define_insn "neon_v<shift_op><sup><mode>"
4913 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4914 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4915 (match_operand:VDQIX 2 "s_register_operand" "w")]
4918 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4919 [(set_attr "type" "neon_sat_shift_imm<q>")]
4923 (define_insn "neon_v<shift_op><sup>_n<mode>"
4924 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4925 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4926 (match_operand:SI 2 "immediate_operand" "i")]
4930 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4931 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4933 [(set_attr "type" "neon_shift_imm<q>")]
4936 ;; vshrn_n, vrshrn_n
4937 (define_insn "neon_v<shift_op>_n<mode>"
4938 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4939 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4940 (match_operand:SI 2 "immediate_operand" "i")]
4944 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4945 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4947 [(set_attr "type" "neon_shift_imm_narrow_q")]
4950 ;; vqshrn_n, vqrshrn_n
4951 (define_insn "neon_v<shift_op><sup>_n<mode>"
4952 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4953 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4954 (match_operand:SI 2 "immediate_operand" "i")]
4958 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4959 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4961 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4964 ;; vqshrun_n, vqrshrun_n
4965 (define_insn "neon_v<shift_op>_n<mode>"
4966 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4967 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4968 (match_operand:SI 2 "immediate_operand" "i")]
4972 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4973 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4975 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4978 (define_insn "neon_vshl_n<mode>"
4979 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4980 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4981 (match_operand:SI 2 "immediate_operand" "i")]
4985 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4986 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4988 [(set_attr "type" "neon_shift_imm<q>")]
4991 (define_insn "neon_vqshl_<sup>_n<mode>"
4992 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4993 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4994 (match_operand:SI 2 "immediate_operand" "i")]
4998 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4999 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5001 [(set_attr "type" "neon_sat_shift_imm<q>")]
5004 (define_insn "neon_vqshlu_n<mode>"
5005 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5006 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5007 (match_operand:SI 2 "immediate_operand" "i")]
5011 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5012 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5014 [(set_attr "type" "neon_sat_shift_imm<q>")]
5017 (define_insn "neon_vshll<sup>_n<mode>"
5018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5019 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5020 (match_operand:SI 2 "immediate_operand" "i")]
5024 /* The boundaries are: 0 < imm <= size. */
5025 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5026 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5028 [(set_attr "type" "neon_shift_imm_long")]
5032 (define_insn "neon_v<shift_op><sup>_n<mode>"
5033 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5034 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5035 (match_operand:VDQIX 2 "s_register_operand" "w")
5036 (match_operand:SI 3 "immediate_operand" "i")]
5040 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5041 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5043 [(set_attr "type" "neon_shift_acc<q>")]
5046 (define_insn "neon_vsri_n<mode>"
5047 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5048 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5049 (match_operand:VDQIX 2 "s_register_operand" "w")
5050 (match_operand:SI 3 "immediate_operand" "i")]
5054 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5055 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5057 [(set_attr "type" "neon_shift_reg<q>")]
5060 (define_insn "neon_vsli_n<mode>"
5061 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5062 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5063 (match_operand:VDQIX 2 "s_register_operand" "w")
5064 (match_operand:SI 3 "immediate_operand" "i")]
5068 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5069 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5071 [(set_attr "type" "neon_shift_reg<q>")]
5074 (define_insn "neon_vtbl1v8qi"
5075 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5076 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5077 (match_operand:V8QI 2 "s_register_operand" "w")]
5080 "vtbl.8\t%P0, {%P1}, %P2"
5081 [(set_attr "type" "neon_tbl1")]
5084 (define_insn "neon_vtbl2v8qi"
5085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5087 (match_operand:V8QI 2 "s_register_operand" "w")]
5092 int tabbase = REGNO (operands[1]);
5094 ops[0] = operands[0];
5095 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097 ops[3] = operands[2];
5098 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5102 [(set_attr "type" "neon_tbl2")]
5105 (define_insn "neon_vtbl3v8qi"
5106 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5107 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5108 (match_operand:V8QI 2 "s_register_operand" "w")]
5113 int tabbase = REGNO (operands[1]);
5115 ops[0] = operands[0];
5116 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5117 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5118 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5119 ops[4] = operands[2];
5120 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5124 [(set_attr "type" "neon_tbl3")]
5127 (define_insn "neon_vtbl4v8qi"
5128 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5129 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5130 (match_operand:V8QI 2 "s_register_operand" "w")]
5135 int tabbase = REGNO (operands[1]);
5137 ops[0] = operands[0];
5138 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5139 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5140 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5141 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5142 ops[5] = operands[2];
5143 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5147 [(set_attr "type" "neon_tbl4")]
5150 ;; These three are used by the vec_perm infrastructure for V16QImode.
5151 (define_insn_and_split "neon_vtbl1v16qi"
5152 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5153 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5154 (match_operand:V16QI 2 "s_register_operand" "w")]
5158 "&& reload_completed"
5161 rtx op0, op1, op2, part0, part2;
5165 op1 = gen_lowpart (TImode, operands[1]);
5168 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5169 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5170 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5171 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5173 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5174 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5175 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5176 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5179 [(set_attr "type" "multiple")]
5182 (define_insn_and_split "neon_vtbl2v16qi"
5183 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5184 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5185 (match_operand:V16QI 2 "s_register_operand" "w")]
5189 "&& reload_completed"
5192 rtx op0, op1, op2, part0, part2;
5199 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5200 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5201 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5202 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5204 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5205 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5206 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5207 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5210 [(set_attr "type" "multiple")]
5213 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5214 ;; handle quad-word input modes, producing octa-word output modes. But
5215 ;; that requires us to add support for octa-word vector modes in moves.
5216 ;; That seems overkill for this one use in vec_perm.
5217 (define_insn_and_split "neon_vcombinev16qi"
5218 [(set (match_operand:OI 0 "s_register_operand" "=w")
5219 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5220 (match_operand:V16QI 2 "s_register_operand" "w")]
5224 "&& reload_completed"
5227 neon_split_vcombine (operands);
5230 [(set_attr "type" "multiple")]
5233 (define_insn "neon_vtbx1v8qi"
5234 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5235 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5236 (match_operand:V8QI 2 "s_register_operand" "w")
5237 (match_operand:V8QI 3 "s_register_operand" "w")]
5240 "vtbx.8\t%P0, {%P2}, %P3"
5241 [(set_attr "type" "neon_tbl1")]
5244 (define_insn "neon_vtbx2v8qi"
5245 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5246 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5247 (match_operand:TI 2 "s_register_operand" "w")
5248 (match_operand:V8QI 3 "s_register_operand" "w")]
5253 int tabbase = REGNO (operands[2]);
5255 ops[0] = operands[0];
5256 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5257 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5258 ops[3] = operands[3];
5259 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5263 [(set_attr "type" "neon_tbl2")]
5266 (define_insn "neon_vtbx3v8qi"
5267 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5268 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5269 (match_operand:EI 2 "s_register_operand" "w")
5270 (match_operand:V8QI 3 "s_register_operand" "w")]
5275 int tabbase = REGNO (operands[2]);
5277 ops[0] = operands[0];
5278 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5279 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5280 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5281 ops[4] = operands[3];
5282 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5286 [(set_attr "type" "neon_tbl3")]
5289 (define_insn "neon_vtbx4v8qi"
5290 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5291 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5292 (match_operand:OI 2 "s_register_operand" "w")
5293 (match_operand:V8QI 3 "s_register_operand" "w")]
5298 int tabbase = REGNO (operands[2]);
5300 ops[0] = operands[0];
5301 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5302 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5303 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5304 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5305 ops[5] = operands[3];
5306 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5310 [(set_attr "type" "neon_tbl4")]
5313 (define_expand "neon_vtrn<mode>_internal"
5315 [(set (match_operand:VDQWH 0 "s_register_operand")
5316 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5317 (match_operand:VDQWH 2 "s_register_operand")]
5319 (set (match_operand:VDQWH 3 "s_register_operand")
5320 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5325 ;; Note: Different operand numbering to handle tied registers correctly.
5326 (define_insn "*neon_vtrn<mode>_insn"
5327 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5328 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5329 (match_operand:VDQWH 3 "s_register_operand" "2")]
5331 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5332 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5335 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5336 [(set_attr "type" "neon_permute<q>")]
5339 (define_expand "neon_vzip<mode>_internal"
5341 [(set (match_operand:VDQWH 0 "s_register_operand")
5342 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5343 (match_operand:VDQWH 2 "s_register_operand")]
5345 (set (match_operand:VDQWH 3 "s_register_operand")
5346 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5351 ;; Note: Different operand numbering to handle tied registers correctly.
5352 (define_insn "*neon_vzip<mode>_insn"
5353 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5354 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5355 (match_operand:VDQWH 3 "s_register_operand" "2")]
5357 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5358 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5361 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5362 [(set_attr "type" "neon_zip<q>")]
5365 (define_expand "neon_vuzp<mode>_internal"
5367 [(set (match_operand:VDQWH 0 "s_register_operand")
5368 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5369 (match_operand:VDQWH 2 "s_register_operand")]
5371 (set (match_operand:VDQWH 3 "s_register_operand" "")
5372 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5377 ;; Note: Different operand numbering to handle tied registers correctly.
5378 (define_insn "*neon_vuzp<mode>_insn"
5379 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5380 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5381 (match_operand:VDQWH 3 "s_register_operand" "2")]
5383 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5384 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5387 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5388 [(set_attr "type" "neon_zip<q>")]
5391 (define_expand "vec_load_lanes<mode><mode>"
5392 [(set (match_operand:VDQX 0 "s_register_operand")
5393 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5397 (define_insn "neon_vld1<mode>"
5398 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5399 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5402 "vld1.<V_sz_elem>\t%h0, %A1"
5403 [(set_attr "type" "neon_load1_1reg<q>")]
5406 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5407 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5409 (define_insn "neon_vld1_lane<mode>"
5410 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5411 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412 (match_operand:VDX 2 "s_register_operand" "0")
5413 (match_operand:SI 3 "immediate_operand" "i")]
5417 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419 operands[3] = GEN_INT (lane);
5421 return "vld1.<V_sz_elem>\t%P0, %A1";
5423 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5425 [(set_attr "type" "neon_load1_one_lane<q>")]
5428 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5429 ;; here on big endian targets.
5430 (define_insn "neon_vld1_lane<mode>"
5431 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5432 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5433 (match_operand:VQX 2 "s_register_operand" "0")
5434 (match_operand:SI 3 "immediate_operand" "i")]
5438 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5439 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5440 operands[3] = GEN_INT (lane);
5441 int regno = REGNO (operands[0]);
5442 if (lane >= max / 2)
5446 operands[3] = GEN_INT (lane);
5448 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5450 return "vld1.<V_sz_elem>\t%P0, %A1";
5452 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5454 [(set_attr "type" "neon_load1_one_lane<q>")]
5457 (define_insn "neon_vld1_dup<mode>"
5458 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5459 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5461 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5462 [(set_attr "type" "neon_load1_all_lanes<q>")]
5465 ;; Special case for DImode. Treat it exactly like a simple load.
5466 (define_expand "neon_vld1_dupdi"
5467 [(set (match_operand:DI 0 "s_register_operand" "")
5468 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5474 (define_insn "neon_vld1_dup<mode>"
5475 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5476 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5479 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5481 [(set_attr "type" "neon_load1_all_lanes<q>")]
5484 (define_insn_and_split "neon_vld1_dupv2di"
5485 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5486 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5489 "&& reload_completed"
5492 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5493 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5494 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5497 [(set_attr "length" "8")
5498 (set_attr "type" "neon_load1_all_lanes_q")]
5501 (define_expand "vec_store_lanes<mode><mode>"
5502 [(set (match_operand:VDQX 0 "neon_struct_operand")
5503 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5507 (define_insn "neon_vst1<mode>"
5508 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5509 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5512 "vst1.<V_sz_elem>\t%h1, %A0"
5513 [(set_attr "type" "neon_store1_1reg<q>")])
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5520 [(match_operand:VDX 1 "s_register_operand" "w")
5521 (match_operand:SI 2 "immediate_operand" "i")]
5525 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527 operands[2] = GEN_INT (lane);
5529 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5531 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5533 [(set_attr "type" "neon_store1_one_lane<q>")]
5536 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5537 ;; here on big endian targets.
5538 (define_insn "neon_vst1_lane<mode>"
5539 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5541 [(match_operand:VQX 1 "s_register_operand" "w")
5542 (match_operand:SI 2 "immediate_operand" "i")]
5546 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5547 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5548 int regno = REGNO (operands[1]);
5549 if (lane >= max / 2)
5554 operands[2] = GEN_INT (lane);
5555 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5557 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5559 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5561 [(set_attr "type" "neon_store1_one_lane<q>")]
5564 (define_expand "vec_load_lanesti<mode>"
5565 [(set (match_operand:TI 0 "s_register_operand")
5566 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5567 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5571 (define_insn "neon_vld2<mode>"
5572 [(set (match_operand:TI 0 "s_register_operand" "=w")
5573 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5574 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5578 if (<V_sz_elem> == 64)
5579 return "vld1.64\t%h0, %A1";
5581 return "vld2.<V_sz_elem>\t%h0, %A1";
5584 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5585 (const_string "neon_load1_2reg<q>")
5586 (const_string "neon_load2_2reg<q>")))]
5589 (define_expand "vec_load_lanesoi<mode>"
5590 [(set (match_operand:OI 0 "s_register_operand")
5591 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5592 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5596 (define_insn "neon_vld2<mode>"
5597 [(set (match_operand:OI 0 "s_register_operand" "=w")
5598 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5599 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5602 "vld2.<V_sz_elem>\t%h0, %A1"
5603 [(set_attr "type" "neon_load2_2reg_q")])
5605 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5606 ;; here on big endian targets.
5607 (define_insn "neon_vld2_lane<mode>"
5608 [(set (match_operand:TI 0 "s_register_operand" "=w")
5609 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5610 (match_operand:TI 2 "s_register_operand" "0")
5611 (match_operand:SI 3 "immediate_operand" "i")
5612 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5616 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5617 int regno = REGNO (operands[0]);
5619 ops[0] = gen_rtx_REG (DImode, regno);
5620 ops[1] = gen_rtx_REG (DImode, regno + 2);
5621 ops[2] = operands[1];
5622 ops[3] = GEN_INT (lane);
5623 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5626 [(set_attr "type" "neon_load2_one_lane<q>")]
5629 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5630 ;; here on big endian targets.
5631 (define_insn "neon_vld2_lane<mode>"
5632 [(set (match_operand:OI 0 "s_register_operand" "=w")
5633 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5634 (match_operand:OI 2 "s_register_operand" "0")
5635 (match_operand:SI 3 "immediate_operand" "i")
5636 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5640 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5641 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5642 int regno = REGNO (operands[0]);
5644 if (lane >= max / 2)
5649 ops[0] = gen_rtx_REG (DImode, regno);
5650 ops[1] = gen_rtx_REG (DImode, regno + 4);
5651 ops[2] = operands[1];
5652 ops[3] = GEN_INT (lane);
5653 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5656 [(set_attr "type" "neon_load2_one_lane<q>")]
5659 (define_insn "neon_vld2_dup<mode>"
5660 [(set (match_operand:TI 0 "s_register_operand" "=w")
5661 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5662 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5666 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5667 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5669 return "vld1.<V_sz_elem>\t%h0, %A1";
5672 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5673 (const_string "neon_load2_all_lanes<q>")
5674 (const_string "neon_load1_1reg<q>")))]
5677 (define_expand "vec_store_lanesti<mode>"
5678 [(set (match_operand:TI 0 "neon_struct_operand")
5679 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5680 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5684 (define_insn "neon_vst2<mode>"
5685 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5686 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5687 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5691 if (<V_sz_elem> == 64)
5692 return "vst1.64\t%h1, %A0";
5694 return "vst2.<V_sz_elem>\t%h1, %A0";
5697 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5698 (const_string "neon_store1_2reg<q>")
5699 (const_string "neon_store2_one_lane<q>")))]
5702 (define_expand "vec_store_lanesoi<mode>"
5703 [(set (match_operand:OI 0 "neon_struct_operand")
5704 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5705 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709 (define_insn "neon_vst2<mode>"
5710 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5711 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5712 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715 "vst2.<V_sz_elem>\t%h1, %A0"
5716 [(set_attr "type" "neon_store2_4reg<q>")]
5719 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5720 ;; here on big endian targets.
5721 (define_insn "neon_vst2_lane<mode>"
5722 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5723 (unspec:<V_two_elem>
5724 [(match_operand:TI 1 "s_register_operand" "w")
5725 (match_operand:SI 2 "immediate_operand" "i")
5726 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5730 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5731 int regno = REGNO (operands[1]);
5733 ops[0] = operands[0];
5734 ops[1] = gen_rtx_REG (DImode, regno);
5735 ops[2] = gen_rtx_REG (DImode, regno + 2);
5736 ops[3] = GEN_INT (lane);
5737 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5740 [(set_attr "type" "neon_store2_one_lane<q>")]
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vst2_lane<mode>"
5746 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5747 (unspec:<V_two_elem>
5748 [(match_operand:OI 1 "s_register_operand" "w")
5749 (match_operand:SI 2 "immediate_operand" "i")
5750 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5754 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5755 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5756 int regno = REGNO (operands[1]);
5758 if (lane >= max / 2)
5763 ops[0] = operands[0];
5764 ops[1] = gen_rtx_REG (DImode, regno);
5765 ops[2] = gen_rtx_REG (DImode, regno + 4);
5766 ops[3] = GEN_INT (lane);
5767 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5770 [(set_attr "type" "neon_store2_one_lane<q>")]
5773 (define_expand "vec_load_lanesei<mode>"
5774 [(set (match_operand:EI 0 "s_register_operand")
5775 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5776 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5780 (define_insn "neon_vld3<mode>"
5781 [(set (match_operand:EI 0 "s_register_operand" "=w")
5782 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5783 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5787 if (<V_sz_elem> == 64)
5788 return "vld1.64\t%h0, %A1";
5790 return "vld3.<V_sz_elem>\t%h0, %A1";
5793 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5794 (const_string "neon_load1_3reg<q>")
5795 (const_string "neon_load3_3reg<q>")))]
5798 (define_expand "vec_load_lanesci<mode>"
5799 [(match_operand:CI 0 "s_register_operand")
5800 (match_operand:CI 1 "neon_struct_operand")
5801 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5804 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5808 (define_expand "neon_vld3<mode>"
5809 [(match_operand:CI 0 "s_register_operand")
5810 (match_operand:CI 1 "neon_struct_operand")
5811 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5816 mem = adjust_address (operands[1], EImode, 0);
5817 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5818 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5819 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5823 (define_insn "neon_vld3qa<mode>"
5824 [(set (match_operand:CI 0 "s_register_operand" "=w")
5825 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5826 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5830 int regno = REGNO (operands[0]);
5832 ops[0] = gen_rtx_REG (DImode, regno);
5833 ops[1] = gen_rtx_REG (DImode, regno + 4);
5834 ops[2] = gen_rtx_REG (DImode, regno + 8);
5835 ops[3] = operands[1];
5836 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5839 [(set_attr "type" "neon_load3_3reg<q>")]
5842 (define_insn "neon_vld3qb<mode>"
5843 [(set (match_operand:CI 0 "s_register_operand" "=w")
5844 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5845 (match_operand:CI 2 "s_register_operand" "0")
5846 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5850 int regno = REGNO (operands[0]);
5852 ops[0] = gen_rtx_REG (DImode, regno + 2);
5853 ops[1] = gen_rtx_REG (DImode, regno + 6);
5854 ops[2] = gen_rtx_REG (DImode, regno + 10);
5855 ops[3] = operands[1];
5856 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5859 [(set_attr "type" "neon_load3_3reg<q>")]
5862 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5863 ;; here on big endian targets.
5864 (define_insn "neon_vld3_lane<mode>"
5865 [(set (match_operand:EI 0 "s_register_operand" "=w")
5866 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5867 (match_operand:EI 2 "s_register_operand" "0")
5868 (match_operand:SI 3 "immediate_operand" "i")
5869 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5873 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5874 int regno = REGNO (operands[0]);
5876 ops[0] = gen_rtx_REG (DImode, regno);
5877 ops[1] = gen_rtx_REG (DImode, regno + 2);
5878 ops[2] = gen_rtx_REG (DImode, regno + 4);
5879 ops[3] = operands[1];
5880 ops[4] = GEN_INT (lane);
5881 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5885 [(set_attr "type" "neon_load3_one_lane<q>")]
5888 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5889 ;; here on big endian targets.
5890 (define_insn "neon_vld3_lane<mode>"
5891 [(set (match_operand:CI 0 "s_register_operand" "=w")
5892 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5893 (match_operand:CI 2 "s_register_operand" "0")
5894 (match_operand:SI 3 "immediate_operand" "i")
5895 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5899 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5900 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5901 int regno = REGNO (operands[0]);
5903 if (lane >= max / 2)
5908 ops[0] = gen_rtx_REG (DImode, regno);
5909 ops[1] = gen_rtx_REG (DImode, regno + 4);
5910 ops[2] = gen_rtx_REG (DImode, regno + 8);
5911 ops[3] = operands[1];
5912 ops[4] = GEN_INT (lane);
5913 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5917 [(set_attr "type" "neon_load3_one_lane<q>")]
5920 (define_insn "neon_vld3_dup<mode>"
5921 [(set (match_operand:EI 0 "s_register_operand" "=w")
5922 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5923 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5927 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5929 int regno = REGNO (operands[0]);
5931 ops[0] = gen_rtx_REG (DImode, regno);
5932 ops[1] = gen_rtx_REG (DImode, regno + 2);
5933 ops[2] = gen_rtx_REG (DImode, regno + 4);
5934 ops[3] = operands[1];
5935 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5939 return "vld1.<V_sz_elem>\t%h0, %A1";
5942 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5943 (const_string "neon_load3_all_lanes<q>")
5944 (const_string "neon_load1_1reg<q>")))])
5946 (define_expand "vec_store_lanesei<mode>"
5947 [(set (match_operand:EI 0 "neon_struct_operand")
5948 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5949 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5953 (define_insn "neon_vst3<mode>"
5954 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5955 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5956 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5960 if (<V_sz_elem> == 64)
5961 return "vst1.64\t%h1, %A0";
5963 return "vst3.<V_sz_elem>\t%h1, %A0";
5966 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5967 (const_string "neon_store1_3reg<q>")
5968 (const_string "neon_store3_one_lane<q>")))])
5970 (define_expand "vec_store_lanesci<mode>"
5971 [(match_operand:CI 0 "neon_struct_operand")
5972 (match_operand:CI 1 "s_register_operand")
5973 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5976 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5980 (define_expand "neon_vst3<mode>"
5981 [(match_operand:CI 0 "neon_struct_operand")
5982 (match_operand:CI 1 "s_register_operand")
5983 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5988 mem = adjust_address (operands[0], EImode, 0);
5989 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5990 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5991 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5995 (define_insn "neon_vst3qa<mode>"
5996 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5997 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5998 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6002 int regno = REGNO (operands[1]);
6004 ops[0] = operands[0];
6005 ops[1] = gen_rtx_REG (DImode, regno);
6006 ops[2] = gen_rtx_REG (DImode, regno + 4);
6007 ops[3] = gen_rtx_REG (DImode, regno + 8);
6008 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6011 [(set_attr "type" "neon_store3_3reg<q>")]
6014 (define_insn "neon_vst3qb<mode>"
6015 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6016 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6017 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6021 int regno = REGNO (operands[1]);
6023 ops[0] = operands[0];
6024 ops[1] = gen_rtx_REG (DImode, regno + 2);
6025 ops[2] = gen_rtx_REG (DImode, regno + 6);
6026 ops[3] = gen_rtx_REG (DImode, regno + 10);
6027 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6030 [(set_attr "type" "neon_store3_3reg<q>")]
6033 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6034 ;; here on big endian targets.
6035 (define_insn "neon_vst3_lane<mode>"
6036 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6037 (unspec:<V_three_elem>
6038 [(match_operand:EI 1 "s_register_operand" "w")
6039 (match_operand:SI 2 "immediate_operand" "i")
6040 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6044 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6045 int regno = REGNO (operands[1]);
6047 ops[0] = operands[0];
6048 ops[1] = gen_rtx_REG (DImode, regno);
6049 ops[2] = gen_rtx_REG (DImode, regno + 2);
6050 ops[3] = gen_rtx_REG (DImode, regno + 4);
6051 ops[4] = GEN_INT (lane);
6052 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6056 [(set_attr "type" "neon_store3_one_lane<q>")]
6059 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6060 ;; here on big endian targets.
6061 (define_insn "neon_vst3_lane<mode>"
6062 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6063 (unspec:<V_three_elem>
6064 [(match_operand:CI 1 "s_register_operand" "w")
6065 (match_operand:SI 2 "immediate_operand" "i")
6066 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6070 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6071 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6072 int regno = REGNO (operands[1]);
6074 if (lane >= max / 2)
6079 ops[0] = operands[0];
6080 ops[1] = gen_rtx_REG (DImode, regno);
6081 ops[2] = gen_rtx_REG (DImode, regno + 4);
6082 ops[3] = gen_rtx_REG (DImode, regno + 8);
6083 ops[4] = GEN_INT (lane);
6084 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6088 [(set_attr "type" "neon_store3_one_lane<q>")]
6091 (define_expand "vec_load_lanesoi<mode>"
6092 [(set (match_operand:OI 0 "s_register_operand")
6093 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6094 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6098 (define_insn "neon_vld4<mode>"
6099 [(set (match_operand:OI 0 "s_register_operand" "=w")
6100 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6101 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6105 if (<V_sz_elem> == 64)
6106 return "vld1.64\t%h0, %A1";
6108 return "vld4.<V_sz_elem>\t%h0, %A1";
6111 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6112 (const_string "neon_load1_4reg<q>")
6113 (const_string "neon_load4_4reg<q>")))]
6116 (define_expand "vec_load_lanesxi<mode>"
6117 [(match_operand:XI 0 "s_register_operand")
6118 (match_operand:XI 1 "neon_struct_operand")
6119 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6122 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6126 (define_expand "neon_vld4<mode>"
6127 [(match_operand:XI 0 "s_register_operand")
6128 (match_operand:XI 1 "neon_struct_operand")
6129 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6134 mem = adjust_address (operands[1], OImode, 0);
6135 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6136 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6137 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6141 (define_insn "neon_vld4qa<mode>"
6142 [(set (match_operand:XI 0 "s_register_operand" "=w")
6143 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6144 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6148 int regno = REGNO (operands[0]);
6150 ops[0] = gen_rtx_REG (DImode, regno);
6151 ops[1] = gen_rtx_REG (DImode, regno + 4);
6152 ops[2] = gen_rtx_REG (DImode, regno + 8);
6153 ops[3] = gen_rtx_REG (DImode, regno + 12);
6154 ops[4] = operands[1];
6155 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6158 [(set_attr "type" "neon_load4_4reg<q>")]
6161 (define_insn "neon_vld4qb<mode>"
6162 [(set (match_operand:XI 0 "s_register_operand" "=w")
6163 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6164 (match_operand:XI 2 "s_register_operand" "0")
6165 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6169 int regno = REGNO (operands[0]);
6171 ops[0] = gen_rtx_REG (DImode, regno + 2);
6172 ops[1] = gen_rtx_REG (DImode, regno + 6);
6173 ops[2] = gen_rtx_REG (DImode, regno + 10);
6174 ops[3] = gen_rtx_REG (DImode, regno + 14);
6175 ops[4] = operands[1];
6176 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6179 [(set_attr "type" "neon_load4_4reg<q>")]
6182 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6183 ;; here on big endian targets.
6184 (define_insn "neon_vld4_lane<mode>"
6185 [(set (match_operand:OI 0 "s_register_operand" "=w")
6186 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6187 (match_operand:OI 2 "s_register_operand" "0")
6188 (match_operand:SI 3 "immediate_operand" "i")
6189 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6193 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6194 int regno = REGNO (operands[0]);
6196 ops[0] = gen_rtx_REG (DImode, regno);
6197 ops[1] = gen_rtx_REG (DImode, regno + 2);
6198 ops[2] = gen_rtx_REG (DImode, regno + 4);
6199 ops[3] = gen_rtx_REG (DImode, regno + 6);
6200 ops[4] = operands[1];
6201 ops[5] = GEN_INT (lane);
6202 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6206 [(set_attr "type" "neon_load4_one_lane<q>")]
6209 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6210 ;; here on big endian targets.
6211 (define_insn "neon_vld4_lane<mode>"
6212 [(set (match_operand:XI 0 "s_register_operand" "=w")
6213 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6214 (match_operand:XI 2 "s_register_operand" "0")
6215 (match_operand:SI 3 "immediate_operand" "i")
6216 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6220 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6221 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6222 int regno = REGNO (operands[0]);
6224 if (lane >= max / 2)
6229 ops[0] = gen_rtx_REG (DImode, regno);
6230 ops[1] = gen_rtx_REG (DImode, regno + 4);
6231 ops[2] = gen_rtx_REG (DImode, regno + 8);
6232 ops[3] = gen_rtx_REG (DImode, regno + 12);
6233 ops[4] = operands[1];
6234 ops[5] = GEN_INT (lane);
6235 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6239 [(set_attr "type" "neon_load4_one_lane<q>")]
6242 (define_insn "neon_vld4_dup<mode>"
6243 [(set (match_operand:OI 0 "s_register_operand" "=w")
6244 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6245 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6249 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6251 int regno = REGNO (operands[0]);
6253 ops[0] = gen_rtx_REG (DImode, regno);
6254 ops[1] = gen_rtx_REG (DImode, regno + 2);
6255 ops[2] = gen_rtx_REG (DImode, regno + 4);
6256 ops[3] = gen_rtx_REG (DImode, regno + 6);
6257 ops[4] = operands[1];
6258 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6263 return "vld1.<V_sz_elem>\t%h0, %A1";
6266 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6267 (const_string "neon_load4_all_lanes<q>")
6268 (const_string "neon_load1_1reg<q>")))]
6271 (define_expand "vec_store_lanesoi<mode>"
6272 [(set (match_operand:OI 0 "neon_struct_operand")
6273 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6274 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6278 (define_insn "neon_vst4<mode>"
6279 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6280 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6281 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6285 if (<V_sz_elem> == 64)
6286 return "vst1.64\t%h1, %A0";
6288 return "vst4.<V_sz_elem>\t%h1, %A0";
6291 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6292 (const_string "neon_store1_4reg<q>")
6293 (const_string "neon_store4_4reg<q>")))]
6296 (define_expand "vec_store_lanesxi<mode>"
6297 [(match_operand:XI 0 "neon_struct_operand")
6298 (match_operand:XI 1 "s_register_operand")
6299 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6302 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6306 (define_expand "neon_vst4<mode>"
6307 [(match_operand:XI 0 "neon_struct_operand")
6308 (match_operand:XI 1 "s_register_operand")
6309 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6314 mem = adjust_address (operands[0], OImode, 0);
6315 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6316 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6317 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6321 (define_insn "neon_vst4qa<mode>"
6322 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6323 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6324 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6328 int regno = REGNO (operands[1]);
6330 ops[0] = operands[0];
6331 ops[1] = gen_rtx_REG (DImode, regno);
6332 ops[2] = gen_rtx_REG (DImode, regno + 4);
6333 ops[3] = gen_rtx_REG (DImode, regno + 8);
6334 ops[4] = gen_rtx_REG (DImode, regno + 12);
6335 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6338 [(set_attr "type" "neon_store4_4reg<q>")]
6341 (define_insn "neon_vst4qb<mode>"
6342 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6343 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6344 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6348 int regno = REGNO (operands[1]);
6350 ops[0] = operands[0];
6351 ops[1] = gen_rtx_REG (DImode, regno + 2);
6352 ops[2] = gen_rtx_REG (DImode, regno + 6);
6353 ops[3] = gen_rtx_REG (DImode, regno + 10);
6354 ops[4] = gen_rtx_REG (DImode, regno + 14);
6355 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6358 [(set_attr "type" "neon_store4_4reg<q>")]
6361 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6362 ;; here on big endian targets.
6363 (define_insn "neon_vst4_lane<mode>"
6364 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6365 (unspec:<V_four_elem>
6366 [(match_operand:OI 1 "s_register_operand" "w")
6367 (match_operand:SI 2 "immediate_operand" "i")
6368 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6372 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6373 int regno = REGNO (operands[1]);
6375 ops[0] = operands[0];
6376 ops[1] = gen_rtx_REG (DImode, regno);
6377 ops[2] = gen_rtx_REG (DImode, regno + 2);
6378 ops[3] = gen_rtx_REG (DImode, regno + 4);
6379 ops[4] = gen_rtx_REG (DImode, regno + 6);
6380 ops[5] = GEN_INT (lane);
6381 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6385 [(set_attr "type" "neon_store4_one_lane<q>")]
6388 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6389 ;; here on big endian targets.
6390 (define_insn "neon_vst4_lane<mode>"
6391 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6392 (unspec:<V_four_elem>
6393 [(match_operand:XI 1 "s_register_operand" "w")
6394 (match_operand:SI 2 "immediate_operand" "i")
6395 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6399 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6400 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6401 int regno = REGNO (operands[1]);
6403 if (lane >= max / 2)
6408 ops[0] = operands[0];
6409 ops[1] = gen_rtx_REG (DImode, regno);
6410 ops[2] = gen_rtx_REG (DImode, regno + 4);
6411 ops[3] = gen_rtx_REG (DImode, regno + 8);
6412 ops[4] = gen_rtx_REG (DImode, regno + 12);
6413 ops[5] = GEN_INT (lane);
6414 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6418 [(set_attr "type" "neon_store4_4reg<q>")]
6421 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6422 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6423 (SE:<V_unpack> (vec_select:<V_HALF>
6424 (match_operand:VU 1 "register_operand" "w")
6425 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6426 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6427 "vmovl.<US><V_sz_elem> %q0, %e1"
6428 [(set_attr "type" "neon_shift_imm_long")]
6431 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6432 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6433 (SE:<V_unpack> (vec_select:<V_HALF>
6434 (match_operand:VU 1 "register_operand" "w")
6435 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6436 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6437 "vmovl.<US><V_sz_elem> %q0, %f1"
6438 [(set_attr "type" "neon_shift_imm_long")]
6441 (define_expand "vec_unpack<US>_hi_<mode>"
6442 [(match_operand:<V_unpack> 0 "register_operand" "")
6443 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6444 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6446 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6449 for (i = 0; i < (<V_mode_nunits>/2); i++)
6450 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6452 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6453 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6460 (define_expand "vec_unpack<US>_lo_<mode>"
6461 [(match_operand:<V_unpack> 0 "register_operand" "")
6462 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6463 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6465 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6468 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6469 RTVEC_ELT (v, i) = GEN_INT (i);
6470 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6471 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6478 (define_insn "neon_vec_<US>mult_lo_<mode>"
6479 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6480 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6481 (match_operand:VU 1 "register_operand" "w")
6482 (match_operand:VU 2 "vect_par_constant_low" "")))
6483 (SE:<V_unpack> (vec_select:<V_HALF>
6484 (match_operand:VU 3 "register_operand" "w")
6486 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6487 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6488 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6491 (define_expand "vec_widen_<US>mult_lo_<mode>"
6492 [(match_operand:<V_unpack> 0 "register_operand" "")
6493 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6494 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6495 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6497 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6500 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6501 RTVEC_ELT (v, i) = GEN_INT (i);
6502 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6504 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6512 (define_insn "neon_vec_<US>mult_hi_<mode>"
6513 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6514 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6515 (match_operand:VU 1 "register_operand" "w")
6516 (match_operand:VU 2 "vect_par_constant_high" "")))
6517 (SE:<V_unpack> (vec_select:<V_HALF>
6518 (match_operand:VU 3 "register_operand" "w")
6520 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6521 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6522 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6525 (define_expand "vec_widen_<US>mult_hi_<mode>"
6526 [(match_operand:<V_unpack> 0 "register_operand" "")
6527 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6528 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6529 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6531 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6534 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6535 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6536 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6538 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6547 (define_insn "neon_vec_<US>shiftl_<mode>"
6548 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6549 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6550 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6553 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6555 [(set_attr "type" "neon_shift_imm_long")]
6558 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6559 [(match_operand:<V_unpack> 0 "register_operand" "")
6560 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6561 (match_operand:SI 2 "immediate_operand" "i")]
6562 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6564 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6565 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6571 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6572 [(match_operand:<V_unpack> 0 "register_operand" "")
6573 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6574 (match_operand:SI 2 "immediate_operand" "i")]
6575 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6577 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6578 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6579 GET_MODE_SIZE (<V_HALF>mode)),
6585 ;; Vectorize for non-neon-quad case
6586 (define_insn "neon_unpack<US>_<mode>"
6587 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6588 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6590 "vmovl.<US><V_sz_elem> %q0, %P1"
6591 [(set_attr "type" "neon_move")]
6594 (define_expand "vec_unpack<US>_lo_<mode>"
6595 [(match_operand:<V_double_width> 0 "register_operand" "")
6596 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6599 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6600 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6601 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6607 (define_expand "vec_unpack<US>_hi_<mode>"
6608 [(match_operand:<V_double_width> 0 "register_operand" "")
6609 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6612 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6613 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6614 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6620 (define_insn "neon_vec_<US>mult_<mode>"
6621 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6622 (mult:<V_widen> (SE:<V_widen>
6623 (match_operand:VDI 1 "register_operand" "w"))
6625 (match_operand:VDI 2 "register_operand" "w"))))]
6627 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6628 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6631 (define_expand "vec_widen_<US>mult_hi_<mode>"
6632 [(match_operand:<V_double_width> 0 "register_operand" "")
6633 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6634 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6637 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6638 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6639 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6646 (define_expand "vec_widen_<US>mult_lo_<mode>"
6647 [(match_operand:<V_double_width> 0 "register_operand" "")
6648 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6649 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6652 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6653 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6654 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6661 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6662 [(match_operand:<V_double_width> 0 "register_operand" "")
6663 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6664 (match_operand:SI 2 "immediate_operand" "i")]
6667 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6668 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6669 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6675 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6676 [(match_operand:<V_double_width> 0 "register_operand" "")
6677 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6678 (match_operand:SI 2 "immediate_operand" "i")]
6681 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6682 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6683 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6689 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6690 ; because the ordering of vector elements in Q registers is different from what
6691 ; the semantics of the instructions require.
6693 (define_insn "vec_pack_trunc_<mode>"
6694 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6695 (vec_concat:<V_narrow_pack>
6696 (truncate:<V_narrow>
6697 (match_operand:VN 1 "register_operand" "w"))
6698 (truncate:<V_narrow>
6699 (match_operand:VN 2 "register_operand" "w"))))]
6700 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6701 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6702 [(set_attr "type" "multiple")
6703 (set_attr "length" "8")]
6706 ;; For the non-quad case.
6707 (define_insn "neon_vec_pack_trunc_<mode>"
6708 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6709 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6710 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6711 "vmovn.i<V_sz_elem>\t%P0, %q1"
6712 [(set_attr "type" "neon_move_narrow_q")]
6715 (define_expand "vec_pack_trunc_<mode>"
6716 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6717 (match_operand:VSHFT 1 "register_operand" "")
6718 (match_operand:VSHFT 2 "register_operand")]
6719 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6721 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6723 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6724 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6725 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6729 (define_insn "neon_vabd<mode>_2"
6730 [(set (match_operand:VF 0 "s_register_operand" "=w")
6731 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6732 (match_operand:VF 2 "s_register_operand" "w"))))]
6733 "TARGET_NEON && flag_unsafe_math_optimizations"
6734 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6735 [(set_attr "type" "neon_fp_abd_s<q>")]
6738 (define_insn "neon_vabd<mode>_3"
6739 [(set (match_operand:VF 0 "s_register_operand" "=w")
6740 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6741 (match_operand:VF 2 "s_register_operand" "w")]
6743 "TARGET_NEON && flag_unsafe_math_optimizations"
6744 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6745 [(set_attr "type" "neon_fp_abd_s<q>")]
6748 ;; Copy from core-to-neon regs, then extend, not vice-versa
6751 [(set (match_operand:DI 0 "s_register_operand" "")
6752 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6753 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6754 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6755 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6757 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6761 [(set (match_operand:DI 0 "s_register_operand" "")
6762 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6763 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6764 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6765 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6767 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6771 [(set (match_operand:DI 0 "s_register_operand" "")
6772 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6773 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6774 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6775 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6777 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6781 [(set (match_operand:DI 0 "s_register_operand" "")
6782 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6783 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6784 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6785 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6787 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6791 [(set (match_operand:DI 0 "s_register_operand" "")
6792 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6793 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6794 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6795 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6797 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6801 [(set (match_operand:DI 0 "s_register_operand" "")
6802 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6803 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6804 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6805 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6807 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));