1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
248 /* This pattern is not permitted to fail during expansion: if both arguments
249 are non-registers (e.g. memory := constant, which can be created by the
250 auto-vectorizer), force operand 1 into a register. */
251 if (!s_register_operand (operands[0], <MODE>mode)
252 && !s_register_operand (operands[1], <MODE>mode))
253 operands[1] = force_reg (<MODE>mode, operands[1]);
256 (define_insn "*movmisalign<mode>_neon_store"
257 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
258 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
259 UNSPEC_MISALIGNED_ACCESS))]
260 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
261 "vst1.<V_sz_elem>\t{%P1}, %A0"
262 [(set_attr "type" "neon_store1_1reg<q>")])
264 (define_insn "*movmisalign<mode>_neon_load"
265 [(set (match_operand:VDX 0 "s_register_operand" "=w")
266 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
268 UNSPEC_MISALIGNED_ACCESS))]
269 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
270 "vld1.<V_sz_elem>\t{%P0}, %A1"
271 [(set_attr "type" "neon_load1_1reg<q>")])
273 (define_insn "*movmisalign<mode>_neon_store"
274 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
275 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
276 UNSPEC_MISALIGNED_ACCESS))]
277 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
278 "vst1.<V_sz_elem>\t{%q1}, %A0"
279 [(set_attr "type" "neon_store1_1reg<q>")])
281 (define_insn "*movmisalign<mode>_neon_load"
282 [(set (match_operand:VQX 0 "s_register_operand" "=w")
283 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
285 UNSPEC_MISALIGNED_ACCESS))]
286 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
287 "vld1.<V_sz_elem>\t{%q0}, %A1"
288 [(set_attr "type" "neon_store1_1reg<q>")])
290 (define_insn "vec_set<mode>_internal"
291 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
294 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
295 (match_operand:VD 3 "s_register_operand" "0,0")
296 (match_operand:SI 2 "immediate_operand" "i,i")))]
299 int elt = ffs ((int) INTVAL (operands[2])) - 1;
300 if (BYTES_BIG_ENDIAN)
301 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
302 operands[2] = GEN_INT (elt);
304 if (which_alternative == 0)
305 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
307 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
309 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
311 (define_insn "vec_set<mode>_internal"
312 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
315 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
316 (match_operand:VQ 3 "s_register_operand" "0,0")
317 (match_operand:SI 2 "immediate_operand" "i,i")))]
320 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
321 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
322 int elt = elem % half_elts;
323 int hi = (elem / half_elts) * 2;
324 int regno = REGNO (operands[0]);
326 if (BYTES_BIG_ENDIAN)
327 elt = half_elts - 1 - elt;
329 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
330 operands[2] = GEN_INT (elt);
332 if (which_alternative == 0)
333 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
335 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
337 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
340 (define_insn "vec_setv2di_internal"
341 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
344 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
345 (match_operand:V2DI 3 "s_register_operand" "0,0")
346 (match_operand:SI 2 "immediate_operand" "i,i")))]
349 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
350 int regno = REGNO (operands[0]) + 2 * elem;
352 operands[0] = gen_rtx_REG (DImode, regno);
354 if (which_alternative == 0)
355 return "vld1.64\t%P0, %A1";
357 return "vmov\t%P0, %Q1, %R1";
359 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
362 (define_expand "vec_set<mode>"
363 [(match_operand:VDQ 0 "s_register_operand" "")
364 (match_operand:<V_elem> 1 "s_register_operand" "")
365 (match_operand:SI 2 "immediate_operand" "")]
368 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
369 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
370 GEN_INT (elem), operands[0]));
374 (define_insn "vec_extract<mode>"
375 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
377 (match_operand:VD 1 "s_register_operand" "w,w")
378 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
381 if (BYTES_BIG_ENDIAN)
383 int elt = INTVAL (operands[2]);
384 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
385 operands[2] = GEN_INT (elt);
388 if (which_alternative == 0)
389 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
391 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
393 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
396 (define_insn "vec_extract<mode>"
397 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
399 (match_operand:VQ 1 "s_register_operand" "w,w")
400 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
403 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
404 int elt = INTVAL (operands[2]) % half_elts;
405 int hi = (INTVAL (operands[2]) / half_elts) * 2;
406 int regno = REGNO (operands[1]);
408 if (BYTES_BIG_ENDIAN)
409 elt = half_elts - 1 - elt;
411 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
412 operands[2] = GEN_INT (elt);
414 if (which_alternative == 0)
415 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
417 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
419 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
422 (define_insn "vec_extractv2di"
423 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
425 (match_operand:V2DI 1 "s_register_operand" "w,w")
426 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
429 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
431 operands[1] = gen_rtx_REG (DImode, regno);
433 if (which_alternative == 0)
434 return "vst1.64\t{%P1}, %A0 @ v2di";
436 return "vmov\t%Q0, %R0, %P1 @ v2di";
438 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
441 (define_expand "vec_init<mode>"
442 [(match_operand:VDQ 0 "s_register_operand" "")
443 (match_operand 1 "" "")]
446 neon_expand_vector_init (operands[0], operands[1]);
450 ;; Doubleword and quadword arithmetic.
452 ;; NOTE: some other instructions also support 64-bit integer
453 ;; element size, which we could potentially use for "long long" operations.
455 (define_insn "*add<mode>3_neon"
456 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
457 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
458 (match_operand:VDQ 2 "s_register_operand" "w")))]
459 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
460 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
462 (if_then_else (match_test "<Is_float_mode>")
463 (const_string "neon_fp_addsub_s<q>")
464 (const_string "neon_add<q>")))]
467 (define_insn "adddi3_neon"
468 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
469 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
470 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
471 (clobber (reg:CC CC_REGNUM))]
474 switch (which_alternative)
476 case 0: /* fall through */
477 case 3: return "vadd.i64\t%P0, %P1, %P2";
483 default: gcc_unreachable ();
486 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
487 multiple,multiple,multiple")
488 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
489 (set_attr "length" "*,8,8,*,8,8,8")
490 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
493 (define_insn "*sub<mode>3_neon"
494 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
495 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
496 (match_operand:VDQ 2 "s_register_operand" "w")))]
497 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
498 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
500 (if_then_else (match_test "<Is_float_mode>")
501 (const_string "neon_fp_addsub_s<q>")
502 (const_string "neon_sub<q>")))]
505 (define_insn "subdi3_neon"
506 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
507 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
508 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
509 (clobber (reg:CC CC_REGNUM))]
512 switch (which_alternative)
514 case 0: /* fall through */
515 case 4: return "vsub.i64\t%P0, %P1, %P2";
516 case 1: /* fall through */
517 case 2: /* fall through */
518 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
519 default: gcc_unreachable ();
522 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
523 (set_attr "conds" "*,clob,clob,clob,*")
524 (set_attr "length" "*,8,8,8,*")
525 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
528 (define_insn "*mul<mode>3_neon"
529 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
530 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
531 (match_operand:VDQW 2 "s_register_operand" "w")))]
532 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
533 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
535 (if_then_else (match_test "<Is_float_mode>")
536 (const_string "neon_fp_mul_s<q>")
537 (const_string "neon_mul_<V_elem_ch><q>")))]
540 (define_insn "mul<mode>3add<mode>_neon"
541 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
542 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
543 (match_operand:VDQW 3 "s_register_operand" "w"))
544 (match_operand:VDQW 1 "s_register_operand" "0")))]
545 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
546 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
548 (if_then_else (match_test "<Is_float_mode>")
549 (const_string "neon_fp_mla_s<q>")
550 (const_string "neon_mla_<V_elem_ch><q>")))]
553 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
554 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
555 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
556 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
557 (match_operand:VDQW 3 "s_register_operand" "w"))))]
558 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
559 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
561 (if_then_else (match_test "<Is_float_mode>")
562 (const_string "neon_fp_mla_s<q>")
563 (const_string "neon_mla_<V_elem_ch><q>")))]
566 ;; Fused multiply-accumulate
567 ;; We define each insn twice here:
568 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
569 ;; to be able to use when converting to FMA.
570 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
571 (define_insn "fma<VCVTF:mode>4"
572 [(set (match_operand:VCVTF 0 "register_operand" "=w")
573 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
574 (match_operand:VCVTF 2 "register_operand" "w")
575 (match_operand:VCVTF 3 "register_operand" "0")))]
576 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
577 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
578 [(set_attr "type" "neon_fp_mla_s<q>")]
581 (define_insn "fma<VCVTF:mode>4_intrinsic"
582 [(set (match_operand:VCVTF 0 "register_operand" "=w")
583 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
584 (match_operand:VCVTF 2 "register_operand" "w")
585 (match_operand:VCVTF 3 "register_operand" "0")))]
586 "TARGET_NEON && TARGET_FMA"
587 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
588 [(set_attr "type" "neon_fp_mla_s<q>")]
591 (define_insn "*fmsub<VCVTF:mode>4"
592 [(set (match_operand:VCVTF 0 "register_operand" "=w")
593 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
594 (match_operand:VCVTF 2 "register_operand" "w")
595 (match_operand:VCVTF 3 "register_operand" "0")))]
596 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
597 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
598 [(set_attr "type" "neon_fp_mla_s<q>")]
601 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
602 [(set (match_operand:VCVTF 0 "register_operand" "=w")
603 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
604 (match_operand:VCVTF 2 "register_operand" "w")
605 (match_operand:VCVTF 3 "register_operand" "0")))]
606 "TARGET_NEON && TARGET_FMA"
607 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
608 [(set_attr "type" "neon_fp_mla_s<q>")]
611 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
612 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
613 (unspec:VCVTF [(match_operand:VCVTF 1
614 "s_register_operand" "w")]
616 "TARGET_NEON && TARGET_FPU_ARMV8"
617 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
618 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
621 (define_insn "ior<mode>3"
622 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
623 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
624 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
627 switch (which_alternative)
629 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
630 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
631 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
632 default: gcc_unreachable ();
635 [(set_attr "type" "neon_logic<q>")]
638 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
639 ;; vorr. We support the pseudo-instruction vand instead, because that
640 ;; corresponds to the canonical form the middle-end expects to use for
641 ;; immediate bitwise-ANDs.
643 (define_insn "and<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
649 switch (which_alternative)
651 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vand", &operands[2],
653 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 (define_insn "orn<mode>3_neon"
661 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
662 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
663 (match_operand:VDQ 1 "s_register_operand" "w")))]
665 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
666 [(set_attr "type" "neon_logic<q>")]
669 ;; TODO: investigate whether we should disable
670 ;; this and bicdi3_neon for the A8 in line with the other
672 (define_insn_and_split "orndi3_neon"
673 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
674 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
675 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
683 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
684 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
685 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
690 operands[3] = gen_highpart (SImode, operands[0]);
691 operands[0] = gen_lowpart (SImode, operands[0]);
692 operands[4] = gen_highpart (SImode, operands[2]);
693 operands[2] = gen_lowpart (SImode, operands[2]);
694 operands[5] = gen_highpart (SImode, operands[1]);
695 operands[1] = gen_lowpart (SImode, operands[1]);
699 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
700 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
704 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
705 (set_attr "length" "*,16,8,8")
706 (set_attr "arch" "any,a,t2,t2")]
709 (define_insn "bic<mode>3_neon"
710 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
711 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
712 (match_operand:VDQ 1 "s_register_operand" "w")))]
714 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
715 [(set_attr "type" "neon_logic<q>")]
718 ;; Compare to *anddi_notdi_di.
719 (define_insn "bicdi3_neon"
720 [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
721 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
722 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
728 [(set_attr "type" "neon_logic,multiple,multiple")
729 (set_attr "length" "*,8,8")]
732 (define_insn "xor<mode>3"
733 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
734 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
735 (match_operand:VDQ 2 "s_register_operand" "w")))]
737 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
738 [(set_attr "type" "neon_logic<q>")]
741 (define_insn "one_cmpl<mode>2"
742 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
743 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
745 "vmvn\t%<V_reg>0, %<V_reg>1"
746 [(set_attr "type" "neon_move<q>")]
749 (define_insn "abs<mode>2"
750 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
751 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
753 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
755 (if_then_else (match_test "<Is_float_mode>")
756 (const_string "neon_fp_abs_s<q>")
757 (const_string "neon_abs<q>")))]
760 (define_insn "neg<mode>2"
761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
762 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
764 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
766 (if_then_else (match_test "<Is_float_mode>")
767 (const_string "neon_fp_neg_s<q>")
768 (const_string "neon_neg<q>")))]
771 (define_insn "negdi2_neon"
772 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
773 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
774 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
775 (clobber (reg:CC CC_REGNUM))]
778 [(set_attr "length" "8")
779 (set_attr "type" "multiple")]
782 ; Split negdi2_neon for vfp registers
784 [(set (match_operand:DI 0 "s_register_operand" "")
785 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
786 (clobber (match_scratch:DI 2 ""))
787 (clobber (reg:CC CC_REGNUM))]
788 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
789 [(set (match_dup 2) (const_int 0))
790 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
791 (clobber (reg:CC CC_REGNUM))])]
793 if (!REG_P (operands[2]))
794 operands[2] = operands[0];
798 ; Split negdi2_neon for core registers
800 [(set (match_operand:DI 0 "s_register_operand" "")
801 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
802 (clobber (match_scratch:DI 2 ""))
803 (clobber (reg:CC CC_REGNUM))]
804 "TARGET_32BIT && reload_completed
805 && arm_general_register_operand (operands[0], DImode)"
806 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
807 (clobber (reg:CC CC_REGNUM))])]
811 (define_insn "*umin<mode>3_neon"
812 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
813 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
814 (match_operand:VDQIW 2 "s_register_operand" "w")))]
816 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
817 [(set_attr "type" "neon_minmax<q>")]
820 (define_insn "*umax<mode>3_neon"
821 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
822 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
823 (match_operand:VDQIW 2 "s_register_operand" "w")))]
825 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
826 [(set_attr "type" "neon_minmax<q>")]
829 (define_insn "*smin<mode>3_neon"
830 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
831 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
832 (match_operand:VDQW 2 "s_register_operand" "w")))]
834 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
836 (if_then_else (match_test "<Is_float_mode>")
837 (const_string "neon_fp_minmax_s<q>")
838 (const_string "neon_minmax<q>")))]
841 (define_insn "*smax<mode>3_neon"
842 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
843 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
844 (match_operand:VDQW 2 "s_register_operand" "w")))]
846 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 (if_then_else (match_test "<Is_float_mode>")
849 (const_string "neon_fp_minmax_s<q>")
850 (const_string "neon_minmax<q>")))]
853 ; TODO: V2DI shifts are current disabled because there are bugs in the
854 ; generic vectorizer code. It ends up creating a V2DI constructor with
857 (define_insn "vashl<mode>3"
858 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
859 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
860 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
863 switch (which_alternative)
865 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
866 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
868 VALID_NEON_QREG_MODE (<MODE>mode),
870 default: gcc_unreachable ();
873 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
876 (define_insn "vashr<mode>3_imm"
877 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
878 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
879 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
882 return neon_output_shift_immediate ("vshr", 's', &operands[2],
883 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
886 [(set_attr "type" "neon_shift_imm<q>")]
889 (define_insn "vlshr<mode>3_imm"
890 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
891 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
892 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
895 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
896 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
899 [(set_attr "type" "neon_shift_imm<q>")]
902 ; Used for implementing logical shift-right, which is a left-shift by a negative
903 ; amount, with signed operands. This is essentially the same as ashl<mode>3
904 ; above, but using an unspec in case GCC tries anything tricky with negative
907 (define_insn "ashl<mode>3_signed"
908 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
909 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
910 (match_operand:VDQI 2 "s_register_operand" "w")]
911 UNSPEC_ASHIFT_SIGNED))]
913 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
914 [(set_attr "type" "neon_shift_reg<q>")]
917 ; Used for implementing logical shift-right, which is a left-shift by a negative
918 ; amount, with unsigned operands.
920 (define_insn "ashl<mode>3_unsigned"
921 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
922 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
923 (match_operand:VDQI 2 "s_register_operand" "w")]
924 UNSPEC_ASHIFT_UNSIGNED))]
926 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
927 [(set_attr "type" "neon_shift_reg<q>")]
930 (define_expand "vashr<mode>3"
931 [(set (match_operand:VDQIW 0 "s_register_operand" "")
932 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
933 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
936 if (s_register_operand (operands[2], <MODE>mode))
938 rtx neg = gen_reg_rtx (<MODE>mode);
939 emit_insn (gen_neg<mode>2 (neg, operands[2]));
940 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
943 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
947 (define_expand "vlshr<mode>3"
948 [(set (match_operand:VDQIW 0 "s_register_operand" "")
949 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
950 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
953 if (s_register_operand (operands[2], <MODE>mode))
955 rtx neg = gen_reg_rtx (<MODE>mode);
956 emit_insn (gen_neg<mode>2 (neg, operands[2]));
957 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
960 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
966 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
967 ;; leaving the upper half uninitalized. This is OK since the shift
968 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
969 ;; data flow analysis however, we pretend the full register is set
971 (define_insn "neon_load_count"
972 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
973 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
977 vld1.32\t{%P0[0]}, %A1
979 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
982 (define_insn "ashldi3_neon_noclobber"
983 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
984 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
985 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
986 "TARGET_NEON && reload_completed
987 && (!CONST_INT_P (operands[2])
988 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
990 vshl.u64\t%P0, %P1, %2
991 vshl.u64\t%P0, %P1, %P2"
992 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
995 (define_insn_and_split "ashldi3_neon"
996 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
997 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
998 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
999 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1000 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1001 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1002 (clobber (reg:CC_C CC_REGNUM))]
1005 "TARGET_NEON && reload_completed"
1009 if (IS_VFP_REGNUM (REGNO (operands[0])))
1011 if (CONST_INT_P (operands[2]))
1013 if (INTVAL (operands[2]) < 1)
1015 emit_insn (gen_movdi (operands[0], operands[1]));
1018 else if (INTVAL (operands[2]) > 63)
1019 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1023 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1024 operands[2] = operands[5];
1027 /* Ditch the unnecessary clobbers. */
1028 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1033 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1034 /* This clobbers CC. */
1035 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1037 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1038 operands[2], operands[3], operands[4]);
1042 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1043 (set_attr "opt" "*,*,speed,speed,*,*")
1044 (set_attr "type" "multiple")]
1047 ; The shift amount needs to be negated for right-shifts
1048 (define_insn "signed_shift_di3_neon"
1049 [(set (match_operand:DI 0 "s_register_operand" "=w")
1050 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1051 (match_operand:DI 2 "s_register_operand" " w")]
1052 UNSPEC_ASHIFT_SIGNED))]
1053 "TARGET_NEON && reload_completed"
1054 "vshl.s64\t%P0, %P1, %P2"
1055 [(set_attr "type" "neon_shift_reg")]
1058 ; The shift amount needs to be negated for right-shifts
1059 (define_insn "unsigned_shift_di3_neon"
1060 [(set (match_operand:DI 0 "s_register_operand" "=w")
1061 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1062 (match_operand:DI 2 "s_register_operand" " w")]
1063 UNSPEC_ASHIFT_UNSIGNED))]
1064 "TARGET_NEON && reload_completed"
1065 "vshl.u64\t%P0, %P1, %P2"
1066 [(set_attr "type" "neon_shift_reg")]
1069 (define_insn "ashrdi3_neon_imm_noclobber"
1070 [(set (match_operand:DI 0 "s_register_operand" "=w")
1071 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1072 (match_operand:DI 2 "const_int_operand" " i")))]
1073 "TARGET_NEON && reload_completed
1074 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1075 "vshr.s64\t%P0, %P1, %2"
1076 [(set_attr "type" "neon_shift_imm")]
1079 (define_insn "lshrdi3_neon_imm_noclobber"
1080 [(set (match_operand:DI 0 "s_register_operand" "=w")
1081 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1082 (match_operand:DI 2 "const_int_operand" " i")))]
1083 "TARGET_NEON && reload_completed
1084 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1085 "vshr.u64\t%P0, %P1, %2"
1086 [(set_attr "type" "neon_shift_imm")]
1091 (define_insn_and_split "<shift>di3_neon"
1092 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1093 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1094 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1095 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1096 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1097 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1098 (clobber (reg:CC CC_REGNUM))]
1101 "TARGET_NEON && reload_completed"
1105 if (IS_VFP_REGNUM (REGNO (operands[0])))
1107 if (CONST_INT_P (operands[2]))
1109 if (INTVAL (operands[2]) < 1)
1111 emit_insn (gen_movdi (operands[0], operands[1]));
1114 else if (INTVAL (operands[2]) > 64)
1115 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1117 /* Ditch the unnecessary clobbers. */
1118 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1124 /* We must use a negative left-shift. */
1125 emit_insn (gen_negsi2 (operands[3], operands[2]));
1126 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1127 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1133 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1134 /* This clobbers CC. */
1135 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1137 /* This clobbers CC (ASHIFTRT by register only). */
1138 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1139 operands[2], operands[3], operands[4]);
1144 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1145 (set_attr "opt" "*,*,speed,speed,*,*")
1146 (set_attr "type" "multiple")]
1149 ;; Widening operations
1151 (define_insn "widen_ssum<mode>3"
1152 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1153 (plus:<V_widen> (sign_extend:<V_widen>
1154 (match_operand:VW 1 "s_register_operand" "%w"))
1155 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1157 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1158 [(set_attr "type" "neon_add_widen")]
1161 (define_insn "widen_usum<mode>3"
1162 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1163 (plus:<V_widen> (zero_extend:<V_widen>
1164 (match_operand:VW 1 "s_register_operand" "%w"))
1165 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1167 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1168 [(set_attr "type" "neon_add_widen")]
1171 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1172 ;; shift-count granularity. That's good enough for the middle-end's current
1175 ;; Note that it's not safe to perform such an operation in big-endian mode,
1176 ;; due to element-ordering issues.
1178 (define_expand "vec_shr_<mode>"
1179 [(match_operand:VDQ 0 "s_register_operand" "")
1180 (match_operand:VDQ 1 "s_register_operand" "")
1181 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1182 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1185 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1186 const int width = GET_MODE_BITSIZE (<MODE>mode);
1187 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1188 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1189 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1191 if (num_bits == width)
1193 emit_move_insn (operands[0], operands[1]);
1197 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1198 operands[0] = gen_lowpart (bvecmode, operands[0]);
1199 operands[1] = gen_lowpart (bvecmode, operands[1]);
1201 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1202 GEN_INT (num_bits / BITS_PER_UNIT)));
1206 (define_expand "vec_shl_<mode>"
1207 [(match_operand:VDQ 0 "s_register_operand" "")
1208 (match_operand:VDQ 1 "s_register_operand" "")
1209 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1210 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1213 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1214 const int width = GET_MODE_BITSIZE (<MODE>mode);
1215 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1216 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1217 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1221 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1225 num_bits = width - num_bits;
1227 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1228 operands[0] = gen_lowpart (bvecmode, operands[0]);
1229 operands[1] = gen_lowpart (bvecmode, operands[1]);
1231 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1232 GEN_INT (num_bits / BITS_PER_UNIT)));
1236 ;; Helpers for quad-word reduction operations
1238 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1239 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1240 ; N/2-element vector.
1242 (define_insn "quad_halves_<code>v4si"
1243 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1245 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1246 (parallel [(const_int 0) (const_int 1)]))
1247 (vec_select:V2SI (match_dup 1)
1248 (parallel [(const_int 2) (const_int 3)]))))]
1250 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1251 [(set_attr "vqh_mnem" "<VQH_mnem>")
1252 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1255 (define_insn "quad_halves_<code>v4sf"
1256 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1258 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1259 (parallel [(const_int 0) (const_int 1)]))
1260 (vec_select:V2SF (match_dup 1)
1261 (parallel [(const_int 2) (const_int 3)]))))]
1262 "TARGET_NEON && flag_unsafe_math_optimizations"
1263 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1264 [(set_attr "vqh_mnem" "<VQH_mnem>")
1265 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1268 (define_insn "quad_halves_<code>v8hi"
1269 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1271 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1272 (parallel [(const_int 0) (const_int 1)
1273 (const_int 2) (const_int 3)]))
1274 (vec_select:V4HI (match_dup 1)
1275 (parallel [(const_int 4) (const_int 5)
1276 (const_int 6) (const_int 7)]))))]
1278 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1279 [(set_attr "vqh_mnem" "<VQH_mnem>")
1280 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1283 (define_insn "quad_halves_<code>v16qi"
1284 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1286 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1287 (parallel [(const_int 0) (const_int 1)
1288 (const_int 2) (const_int 3)
1289 (const_int 4) (const_int 5)
1290 (const_int 6) (const_int 7)]))
1291 (vec_select:V8QI (match_dup 1)
1292 (parallel [(const_int 8) (const_int 9)
1293 (const_int 10) (const_int 11)
1294 (const_int 12) (const_int 13)
1295 (const_int 14) (const_int 15)]))))]
1297 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1298 [(set_attr "vqh_mnem" "<VQH_mnem>")
1299 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1302 (define_expand "move_hi_quad_<mode>"
1303 [(match_operand:ANY128 0 "s_register_operand" "")
1304 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1307 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1308 GET_MODE_SIZE (<V_HALF>mode)),
1313 (define_expand "move_lo_quad_<mode>"
1314 [(match_operand:ANY128 0 "s_register_operand" "")
1315 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1318 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1324 ;; Reduction operations
1326 (define_expand "reduc_splus_<mode>"
1327 [(match_operand:VD 0 "s_register_operand" "")
1328 (match_operand:VD 1 "s_register_operand" "")]
1329 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1331 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1332 &gen_neon_vpadd_internal<mode>);
1336 (define_expand "reduc_splus_<mode>"
1337 [(match_operand:VQ 0 "s_register_operand" "")
1338 (match_operand:VQ 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1340 && !BYTES_BIG_ENDIAN"
1342 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1343 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1345 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1346 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1347 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1352 (define_insn "reduc_splus_v2di"
1353 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1354 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1356 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1357 "vadd.i64\t%e0, %e1, %f1"
1358 [(set_attr "type" "neon_add_q")]
1361 ;; NEON does not distinguish between signed and unsigned addition except on
1362 ;; widening operations.
1363 (define_expand "reduc_uplus_<mode>"
1364 [(match_operand:VDQI 0 "s_register_operand" "")
1365 (match_operand:VDQI 1 "s_register_operand" "")]
1366 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1368 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1372 (define_expand "reduc_smin_<mode>"
1373 [(match_operand:VD 0 "s_register_operand" "")
1374 (match_operand:VD 1 "s_register_operand" "")]
1375 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1377 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1378 &gen_neon_vpsmin<mode>);
1382 (define_expand "reduc_smin_<mode>"
1383 [(match_operand:VQ 0 "s_register_operand" "")
1384 (match_operand:VQ 1 "s_register_operand" "")]
1385 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1386 && !BYTES_BIG_ENDIAN"
1388 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1389 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1391 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1392 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1393 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1398 (define_expand "reduc_smax_<mode>"
1399 [(match_operand:VD 0 "s_register_operand" "")
1400 (match_operand:VD 1 "s_register_operand" "")]
1401 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1403 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1404 &gen_neon_vpsmax<mode>);
1408 (define_expand "reduc_smax_<mode>"
1409 [(match_operand:VQ 0 "s_register_operand" "")
1410 (match_operand:VQ 1 "s_register_operand" "")]
1411 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1412 && !BYTES_BIG_ENDIAN"
1414 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1415 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1417 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1418 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1419 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1424 (define_expand "reduc_umin_<mode>"
1425 [(match_operand:VDI 0 "s_register_operand" "")
1426 (match_operand:VDI 1 "s_register_operand" "")]
1429 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1430 &gen_neon_vpumin<mode>);
1434 (define_expand "reduc_umin_<mode>"
1435 [(match_operand:VQI 0 "s_register_operand" "")
1436 (match_operand:VQI 1 "s_register_operand" "")]
1437 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1439 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1440 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1442 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1443 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1444 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1449 (define_expand "reduc_umax_<mode>"
1450 [(match_operand:VDI 0 "s_register_operand" "")
1451 (match_operand:VDI 1 "s_register_operand" "")]
1454 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1455 &gen_neon_vpumax<mode>);
1459 (define_expand "reduc_umax_<mode>"
1460 [(match_operand:VQI 0 "s_register_operand" "")
1461 (match_operand:VQI 1 "s_register_operand" "")]
1462 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1464 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1465 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1467 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1468 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1469 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1474 (define_insn "neon_vpadd_internal<mode>"
1475 [(set (match_operand:VD 0 "s_register_operand" "=w")
1476 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1477 (match_operand:VD 2 "s_register_operand" "w")]
1480 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1481 ;; Assume this schedules like vadd.
1483 (if_then_else (match_test "<Is_float_mode>")
1484 (const_string "neon_fp_reduc_add_s<q>")
1485 (const_string "neon_reduc_add<q>")))]
1488 (define_insn "neon_vpsmin<mode>"
1489 [(set (match_operand:VD 0 "s_register_operand" "=w")
1490 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1491 (match_operand:VD 2 "s_register_operand" "w")]
1494 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1496 (if_then_else (match_test "<Is_float_mode>")
1497 (const_string "neon_fp_reduc_minmax_s<q>")
1498 (const_string "neon_reduc_minmax<q>")))]
1501 (define_insn "neon_vpsmax<mode>"
1502 [(set (match_operand:VD 0 "s_register_operand" "=w")
1503 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1504 (match_operand:VD 2 "s_register_operand" "w")]
1507 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1509 (if_then_else (match_test "<Is_float_mode>")
1510 (const_string "neon_fp_reduc_minmax_s<q>")
1511 (const_string "neon_reduc_minmax<q>")))]
1514 (define_insn "neon_vpumin<mode>"
1515 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1516 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1517 (match_operand:VDI 2 "s_register_operand" "w")]
1520 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1521 [(set_attr "type" "neon_reduc_minmax<q>")]
1524 (define_insn "neon_vpumax<mode>"
1525 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1526 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1527 (match_operand:VDI 2 "s_register_operand" "w")]
1530 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1531 [(set_attr "type" "neon_reduc_minmax<q>")]
1534 ;; Saturating arithmetic
1536 ; NOTE: Neon supports many more saturating variants of instructions than the
1537 ; following, but these are all GCC currently understands.
1538 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1539 ; yet either, although these patterns may be used by intrinsics when they're
1542 (define_insn "*ss_add<mode>_neon"
1543 [(set (match_operand:VD 0 "s_register_operand" "=w")
1544 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1545 (match_operand:VD 2 "s_register_operand" "w")))]
1547 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1548 [(set_attr "type" "neon_qadd<q>")]
1551 (define_insn "*us_add<mode>_neon"
1552 [(set (match_operand:VD 0 "s_register_operand" "=w")
1553 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1554 (match_operand:VD 2 "s_register_operand" "w")))]
1556 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1557 [(set_attr "type" "neon_qadd<q>")]
1560 (define_insn "*ss_sub<mode>_neon"
1561 [(set (match_operand:VD 0 "s_register_operand" "=w")
1562 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1563 (match_operand:VD 2 "s_register_operand" "w")))]
1565 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1566 [(set_attr "type" "neon_qsub<q>")]
1569 (define_insn "*us_sub<mode>_neon"
1570 [(set (match_operand:VD 0 "s_register_operand" "=w")
1571 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1572 (match_operand:VD 2 "s_register_operand" "w")))]
1574 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1575 [(set_attr "type" "neon_qsub<q>")]
1578 ;; Conditional instructions. These are comparisons with conditional moves for
1579 ;; vectors. They perform the assignment:
1581 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1583 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1586 (define_expand "vcond<mode><mode>"
1587 [(set (match_operand:VDQW 0 "s_register_operand" "")
1589 (match_operator 3 "comparison_operator"
1590 [(match_operand:VDQW 4 "s_register_operand" "")
1591 (match_operand:VDQW 5 "nonmemory_operand" "")])
1592 (match_operand:VDQW 1 "s_register_operand" "")
1593 (match_operand:VDQW 2 "s_register_operand" "")))]
1594 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1596 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1598 rtx magic_rtx = GEN_INT (magic_word);
1600 int use_zero_form = 0;
1601 int swap_bsl_operands = 0;
1602 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1603 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1605 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1606 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1608 switch (GET_CODE (operands[3]))
1615 if (operands[5] == CONST0_RTX (<MODE>mode))
1622 if (!REG_P (operands[5]))
1623 operands[5] = force_reg (<MODE>mode, operands[5]);
1626 switch (GET_CODE (operands[3]))
1636 base_comparison = gen_neon_vcge<mode>;
1637 complimentary_comparison = gen_neon_vcgt<mode>;
1645 base_comparison = gen_neon_vcgt<mode>;
1646 complimentary_comparison = gen_neon_vcge<mode>;
1651 base_comparison = gen_neon_vceq<mode>;
1652 complimentary_comparison = gen_neon_vceq<mode>;
1658 switch (GET_CODE (operands[3]))
1665 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1666 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1672 Note that there also exist direct comparison against 0 forms,
1673 so catch those as a special case. */
1677 switch (GET_CODE (operands[3]))
1680 base_comparison = gen_neon_vclt<mode>;
1683 base_comparison = gen_neon_vcle<mode>;
1686 /* Do nothing, other zero form cases already have the correct
1693 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1695 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1702 /* Vector compare returns false for lanes which are unordered, so if we use
1703 the inverse of the comparison we actually want to emit, then
1704 swap the operands to BSL, we will end up with the correct result.
1705 Note that a NE NaN and NaN NE b are true for all a, b.
1707 Our transformations are:
1712 a NE b -> !(a EQ b) */
1715 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1717 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1719 swap_bsl_operands = 1;
1722 /* We check (a > b || b > a). combining these comparisons give us
1723 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1724 will then give us (a == b || a UNORDERED b) as intended. */
1726 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1727 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1728 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1729 swap_bsl_operands = 1;
1732 /* Operands are ORDERED iff (a > b || b >= a).
1733 Swapping the operands to BSL will give the UNORDERED case. */
1734 swap_bsl_operands = 1;
1737 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1738 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1739 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1745 if (swap_bsl_operands)
1746 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1749 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1754 (define_expand "vcondu<mode><mode>"
1755 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1757 (match_operator 3 "arm_comparison_operator"
1758 [(match_operand:VDQIW 4 "s_register_operand" "")
1759 (match_operand:VDQIW 5 "s_register_operand" "")])
1760 (match_operand:VDQIW 1 "s_register_operand" "")
1761 (match_operand:VDQIW 2 "s_register_operand" "")))]
1765 int inverse = 0, immediate_zero = 0;
1767 mask = gen_reg_rtx (<V_cmp_result>mode);
1769 if (operands[5] == CONST0_RTX (<MODE>mode))
1771 else if (!REG_P (operands[5]))
1772 operands[5] = force_reg (<MODE>mode, operands[5]);
1774 switch (GET_CODE (operands[3]))
1777 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1782 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1787 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1793 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1796 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1802 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1805 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1810 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1820 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1823 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1829 ;; Patterns for builtins.
1831 ; good for plain vadd, vaddq.
1833 (define_expand "neon_vadd<mode>"
1834 [(match_operand:VDQX 0 "s_register_operand" "=w")
1835 (match_operand:VDQX 1 "s_register_operand" "w")
1836 (match_operand:VDQX 2 "s_register_operand" "w")
1837 (match_operand:SI 3 "immediate_operand" "i")]
1840 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1841 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1843 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1848 ; Note that NEON operations don't support the full IEEE 754 standard: in
1849 ; particular, denormal values are flushed to zero. This means that GCC cannot
1850 ; use those instructions for autovectorization, etc. unless
1851 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1852 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1853 ; header) must work in either case: if -funsafe-math-optimizations is given,
1854 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1855 ; expand to unspecs (which may potentially limit the extent to which they might
1856 ; be optimized by generic code).
1858 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1860 (define_insn "neon_vadd<mode>_unspec"
1861 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
1862 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
1863 (match_operand:VDQX 2 "s_register_operand" "w")]
1866 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1868 (if_then_else (match_test "<Is_float_mode>")
1869 (const_string "neon_fp_addsub_s<q>")
1870 (const_string "neon_add<q>")))]
1873 ; operand 3 represents in bits:
1874 ; bit 0: signed (vs unsigned).
1875 ; bit 1: rounding (vs none).
1877 (define_insn "neon_vaddl<mode>"
1878 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1879 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1880 (match_operand:VDI 2 "s_register_operand" "w")
1881 (match_operand:SI 3 "immediate_operand" "i")]
1884 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1885 [(set_attr "type" "neon_add_long")]
1888 (define_insn "neon_vaddw<mode>"
1889 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1890 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1891 (match_operand:VDI 2 "s_register_operand" "w")
1892 (match_operand:SI 3 "immediate_operand" "i")]
1895 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1896 [(set_attr "type" "neon_add_widen")]
1901 (define_insn "neon_vhadd<mode>"
1902 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1903 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1904 (match_operand:VDQIW 2 "s_register_operand" "w")
1905 (match_operand:SI 3 "immediate_operand" "i")]
1908 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1909 [(set_attr "type" "neon_add_halve_q")]
1912 (define_insn "neon_vqadd<mode>"
1913 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1914 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1915 (match_operand:VDQIX 2 "s_register_operand" "w")
1916 (match_operand:SI 3 "immediate_operand" "i")]
1919 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1920 [(set_attr "type" "neon_qadd<q>")]
1923 (define_insn "neon_vaddhn<mode>"
1924 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1925 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1926 (match_operand:VN 2 "s_register_operand" "w")
1927 (match_operand:SI 3 "immediate_operand" "i")]
1930 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1931 [(set_attr "type" "neon_add_halve_narrow_q")]
1934 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1935 ;; polynomial multiplication case that can specified by operand 3.
1936 (define_insn "neon_vmul<mode>"
1937 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1938 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1939 (match_operand:VDQW 2 "s_register_operand" "w")
1940 (match_operand:SI 3 "immediate_operand" "i")]
1943 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1945 (if_then_else (match_test "<Is_float_mode>")
1946 (const_string "neon_fp_mul_s<q>")
1947 (const_string "neon_mul_<V_elem_ch><q>")))]
1950 (define_expand "neon_vmla<mode>"
1951 [(match_operand:VDQW 0 "s_register_operand" "=w")
1952 (match_operand:VDQW 1 "s_register_operand" "0")
1953 (match_operand:VDQW 2 "s_register_operand" "w")
1954 (match_operand:VDQW 3 "s_register_operand" "w")
1955 (match_operand:SI 4 "immediate_operand" "i")]
1958 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1959 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1960 operands[2], operands[3]));
1962 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1963 operands[2], operands[3]));
1967 (define_expand "neon_vfma<VCVTF:mode>"
1968 [(match_operand:VCVTF 0 "s_register_operand")
1969 (match_operand:VCVTF 1 "s_register_operand")
1970 (match_operand:VCVTF 2 "s_register_operand")
1971 (match_operand:VCVTF 3 "s_register_operand")
1972 (match_operand:SI 4 "immediate_operand")]
1973 "TARGET_NEON && TARGET_FMA"
1975 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1980 (define_expand "neon_vfms<VCVTF:mode>"
1981 [(match_operand:VCVTF 0 "s_register_operand")
1982 (match_operand:VCVTF 1 "s_register_operand")
1983 (match_operand:VCVTF 2 "s_register_operand")
1984 (match_operand:VCVTF 3 "s_register_operand")
1985 (match_operand:SI 4 "immediate_operand")]
1986 "TARGET_NEON && TARGET_FMA"
1988 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1993 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1995 (define_insn "neon_vmla<mode>_unspec"
1996 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1997 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1998 (match_operand:VDQW 2 "s_register_operand" "w")
1999 (match_operand:VDQW 3 "s_register_operand" "w")]
2002 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2004 (if_then_else (match_test "<Is_float_mode>")
2005 (const_string "neon_fp_mla_s<q>")
2006 (const_string "neon_mla_<V_elem_ch><q>")))]
2009 (define_insn "neon_vmlal<mode>"
2010 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2011 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2012 (match_operand:VW 2 "s_register_operand" "w")
2013 (match_operand:VW 3 "s_register_operand" "w")
2014 (match_operand:SI 4 "immediate_operand" "i")]
2017 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2018 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2021 (define_expand "neon_vmls<mode>"
2022 [(match_operand:VDQW 0 "s_register_operand" "=w")
2023 (match_operand:VDQW 1 "s_register_operand" "0")
2024 (match_operand:VDQW 2 "s_register_operand" "w")
2025 (match_operand:VDQW 3 "s_register_operand" "w")
2026 (match_operand:SI 4 "immediate_operand" "i")]
2029 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2030 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2031 operands[1], operands[2], operands[3]));
2033 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2034 operands[2], operands[3]));
2038 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2040 (define_insn "neon_vmls<mode>_unspec"
2041 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2042 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2043 (match_operand:VDQW 2 "s_register_operand" "w")
2044 (match_operand:VDQW 3 "s_register_operand" "w")]
2047 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2049 (if_then_else (match_test "<Is_float_mode>")
2050 (const_string "neon_fp_mla_s<q>")
2051 (const_string "neon_mla_<V_elem_ch><q>")))]
2054 (define_insn "neon_vmlsl<mode>"
2055 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2056 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2057 (match_operand:VW 2 "s_register_operand" "w")
2058 (match_operand:VW 3 "s_register_operand" "w")
2059 (match_operand:SI 4 "immediate_operand" "i")]
2062 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2063 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2066 (define_insn "neon_vqdmulh<mode>"
2067 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2068 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2069 (match_operand:VMDQI 2 "s_register_operand" "w")
2070 (match_operand:SI 3 "immediate_operand" "i")]
2073 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2074 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2077 (define_insn "neon_vqdmlal<mode>"
2078 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2079 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2080 (match_operand:VMDI 2 "s_register_operand" "w")
2081 (match_operand:VMDI 3 "s_register_operand" "w")
2082 (match_operand:SI 4 "immediate_operand" "i")]
2085 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2086 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2089 (define_insn "neon_vqdmlsl<mode>"
2090 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2091 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2092 (match_operand:VMDI 2 "s_register_operand" "w")
2093 (match_operand:VMDI 3 "s_register_operand" "w")
2094 (match_operand:SI 4 "immediate_operand" "i")]
2097 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2098 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2101 (define_insn "neon_vmull<mode>"
2102 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2103 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2104 (match_operand:VW 2 "s_register_operand" "w")
2105 (match_operand:SI 3 "immediate_operand" "i")]
2108 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2109 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2112 (define_insn "neon_vqdmull<mode>"
2113 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2114 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2115 (match_operand:VMDI 2 "s_register_operand" "w")
2116 (match_operand:SI 3 "immediate_operand" "i")]
2119 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2120 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2123 (define_expand "neon_vsub<mode>"
2124 [(match_operand:VDQX 0 "s_register_operand" "=w")
2125 (match_operand:VDQX 1 "s_register_operand" "w")
2126 (match_operand:VDQX 2 "s_register_operand" "w")
2127 (match_operand:SI 3 "immediate_operand" "i")]
2130 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2131 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2133 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2138 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2140 (define_insn "neon_vsub<mode>_unspec"
2141 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
2142 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
2143 (match_operand:VDQX 2 "s_register_operand" "w")]
2146 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2148 (if_then_else (match_test "<Is_float_mode>")
2149 (const_string "neon_fp_addsub_s<q>")
2150 (const_string "neon_sub<q>")))]
2153 (define_insn "neon_vsubl<mode>"
2154 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2155 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2156 (match_operand:VDI 2 "s_register_operand" "w")
2157 (match_operand:SI 3 "immediate_operand" "i")]
2160 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2161 [(set_attr "type" "neon_sub_long")]
2164 (define_insn "neon_vsubw<mode>"
2165 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2166 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2167 (match_operand:VDI 2 "s_register_operand" "w")
2168 (match_operand:SI 3 "immediate_operand" "i")]
2171 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2172 [(set_attr "type" "neon_sub_widen")]
2175 (define_insn "neon_vqsub<mode>"
2176 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2177 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2178 (match_operand:VDQIX 2 "s_register_operand" "w")
2179 (match_operand:SI 3 "immediate_operand" "i")]
2182 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2183 [(set_attr "type" "neon_qsub<q>")]
2186 (define_insn "neon_vhsub<mode>"
2187 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2188 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2189 (match_operand:VDQIW 2 "s_register_operand" "w")
2190 (match_operand:SI 3 "immediate_operand" "i")]
2193 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2194 [(set_attr "type" "neon_sub_halve<q>")]
2197 (define_insn "neon_vsubhn<mode>"
2198 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2199 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2200 (match_operand:VN 2 "s_register_operand" "w")
2201 (match_operand:SI 3 "immediate_operand" "i")]
2204 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2205 [(set_attr "type" "neon_sub_halve_narrow_q")]
2208 (define_insn "neon_vceq<mode>"
2209 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2210 (unspec:<V_cmp_result>
2211 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2212 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2213 (match_operand:SI 3 "immediate_operand" "i,i")]
2217 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2218 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2220 (if_then_else (match_test "<Is_float_mode>")
2221 (const_string "neon_fp_compare_s<q>")
2222 (if_then_else (match_operand 2 "zero_operand")
2223 (const_string "neon_compare_zero<q>")
2224 (const_string "neon_compare<q>"))))]
2227 (define_insn "neon_vcge<mode>"
2228 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2229 (unspec:<V_cmp_result>
2230 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2231 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2232 (match_operand:SI 3 "immediate_operand" "i,i")]
2236 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2237 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2239 (if_then_else (match_test "<Is_float_mode>")
2240 (const_string "neon_fp_compare_s<q>")
2241 (if_then_else (match_operand 2 "zero_operand")
2242 (const_string "neon_compare_zero<q>")
2243 (const_string "neon_compare<q>"))))]
2246 (define_insn "neon_vcgeu<mode>"
2247 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2248 (unspec:<V_cmp_result>
2249 [(match_operand:VDQIW 1 "s_register_operand" "w")
2250 (match_operand:VDQIW 2 "s_register_operand" "w")
2251 (match_operand:SI 3 "immediate_operand" "i")]
2254 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2255 [(set_attr "type" "neon_compare<q>")]
2258 (define_insn "neon_vcgt<mode>"
2259 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2260 (unspec:<V_cmp_result>
2261 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2262 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2263 (match_operand:SI 3 "immediate_operand" "i,i")]
2267 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2268 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2270 (if_then_else (match_test "<Is_float_mode>")
2271 (const_string "neon_fp_compare_s<q>")
2272 (if_then_else (match_operand 2 "zero_operand")
2273 (const_string "neon_compare_zero<q>")
2274 (const_string "neon_compare<q>"))))]
2277 (define_insn "neon_vcgtu<mode>"
2278 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2279 (unspec:<V_cmp_result>
2280 [(match_operand:VDQIW 1 "s_register_operand" "w")
2281 (match_operand:VDQIW 2 "s_register_operand" "w")
2282 (match_operand:SI 3 "immediate_operand" "i")]
2285 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2286 [(set_attr "type" "neon_compare<q>")]
2289 ;; VCLE and VCLT only support comparisons with immediate zero (register
2290 ;; variants are VCGE and VCGT with operands reversed).
2292 (define_insn "neon_vcle<mode>"
2293 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2294 (unspec:<V_cmp_result>
2295 [(match_operand:VDQW 1 "s_register_operand" "w")
2296 (match_operand:VDQW 2 "zero_operand" "Dz")
2297 (match_operand:SI 3 "immediate_operand" "i")]
2300 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2302 (if_then_else (match_test "<Is_float_mode>")
2303 (const_string "neon_fp_compare_s<q>")
2304 (if_then_else (match_operand 2 "zero_operand")
2305 (const_string "neon_compare_zero<q>")
2306 (const_string "neon_compare<q>"))))]
2309 (define_insn "neon_vclt<mode>"
2310 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2311 (unspec:<V_cmp_result>
2312 [(match_operand:VDQW 1 "s_register_operand" "w")
2313 (match_operand:VDQW 2 "zero_operand" "Dz")
2314 (match_operand:SI 3 "immediate_operand" "i")]
2317 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2319 (if_then_else (match_test "<Is_float_mode>")
2320 (const_string "neon_fp_compare_s<q>")
2321 (if_then_else (match_operand 2 "zero_operand")
2322 (const_string "neon_compare_zero<q>")
2323 (const_string "neon_compare<q>"))))]
2326 (define_insn "neon_vcage<mode>"
2327 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2328 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2329 (match_operand:VCVTF 2 "s_register_operand" "w")
2330 (match_operand:SI 3 "immediate_operand" "i")]
2333 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2334 [(set_attr "type" "neon_fp_compare_s<q>")]
2337 (define_insn "neon_vcagt<mode>"
2338 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2339 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2340 (match_operand:VCVTF 2 "s_register_operand" "w")
2341 (match_operand:SI 3 "immediate_operand" "i")]
2344 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2345 [(set_attr "type" "neon_fp_compare_s<q>")]
2348 (define_insn "neon_vtst<mode>"
2349 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2350 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2351 (match_operand:VDQIW 2 "s_register_operand" "w")
2352 (match_operand:SI 3 "immediate_operand" "i")]
2355 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_tst<q>")]
2359 (define_insn "neon_vabd<mode>"
2360 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2361 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2362 (match_operand:VDQW 2 "s_register_operand" "w")
2363 (match_operand:SI 3 "immediate_operand" "i")]
2366 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2368 (if_then_else (match_test "<Is_float_mode>")
2369 (const_string "neon_fp_abd_s<q>")
2370 (const_string "neon_abd<q>")))]
2373 (define_insn "neon_vabdl<mode>"
2374 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2375 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2376 (match_operand:VW 2 "s_register_operand" "w")
2377 (match_operand:SI 3 "immediate_operand" "i")]
2380 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2381 [(set_attr "type" "neon_abd_long")]
2384 (define_insn "neon_vaba<mode>"
2385 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2386 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2387 (match_operand:VDQIW 3 "s_register_operand" "w")
2388 (match_operand:SI 4 "immediate_operand" "i")]
2390 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2392 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2393 [(set_attr "type" "neon_arith_acc<q>")]
2396 (define_insn "neon_vabal<mode>"
2397 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2398 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2399 (match_operand:VW 3 "s_register_operand" "w")
2400 (match_operand:SI 4 "immediate_operand" "i")]
2402 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2404 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2405 [(set_attr "type" "neon_arith_acc<q>")]
2408 (define_insn "neon_vmax<mode>"
2409 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2410 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2411 (match_operand:VDQW 2 "s_register_operand" "w")
2412 (match_operand:SI 3 "immediate_operand" "i")]
2415 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2417 (if_then_else (match_test "<Is_float_mode>")
2418 (const_string "neon_fp_minmax_s<q>")
2419 (const_string "neon_minmax<q>")))]
2422 (define_insn "neon_vmin<mode>"
2423 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2424 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2425 (match_operand:VDQW 2 "s_register_operand" "w")
2426 (match_operand:SI 3 "immediate_operand" "i")]
2429 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2431 (if_then_else (match_test "<Is_float_mode>")
2432 (const_string "neon_fp_minmax_s<q>")
2433 (const_string "neon_minmax<q>")))]
2436 (define_expand "neon_vpadd<mode>"
2437 [(match_operand:VD 0 "s_register_operand" "=w")
2438 (match_operand:VD 1 "s_register_operand" "w")
2439 (match_operand:VD 2 "s_register_operand" "w")
2440 (match_operand:SI 3 "immediate_operand" "i")]
2443 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2448 (define_insn "neon_vpaddl<mode>"
2449 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2450 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2451 (match_operand:SI 2 "immediate_operand" "i")]
2454 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2455 [(set_attr "type" "neon_reduc_add_long")]
2458 (define_insn "neon_vpadal<mode>"
2459 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2460 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2461 (match_operand:VDQIW 2 "s_register_operand" "w")
2462 (match_operand:SI 3 "immediate_operand" "i")]
2465 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2466 [(set_attr "type" "neon_reduc_add_acc")]
2469 (define_insn "neon_vpmax<mode>"
2470 [(set (match_operand:VD 0 "s_register_operand" "=w")
2471 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2472 (match_operand:VD 2 "s_register_operand" "w")
2473 (match_operand:SI 3 "immediate_operand" "i")]
2476 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2478 (if_then_else (match_test "<Is_float_mode>")
2479 (const_string "neon_fp_reduc_minmax_s<q>")
2480 (const_string "neon_reduc_minmax<q>")))]
2483 (define_insn "neon_vpmin<mode>"
2484 [(set (match_operand:VD 0 "s_register_operand" "=w")
2485 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2486 (match_operand:VD 2 "s_register_operand" "w")
2487 (match_operand:SI 3 "immediate_operand" "i")]
2490 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2492 (if_then_else (match_test "<Is_float_mode>")
2493 (const_string "neon_fp_reduc_minmax_s<q>")
2494 (const_string "neon_reduc_minmax<q>")))]
2497 (define_insn "neon_vrecps<mode>"
2498 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2499 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2500 (match_operand:VCVTF 2 "s_register_operand" "w")
2501 (match_operand:SI 3 "immediate_operand" "i")]
2504 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2505 [(set_attr "type" "neon_fp_recps_s<q>")]
2508 (define_insn "neon_vrsqrts<mode>"
2509 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2510 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2511 (match_operand:VCVTF 2 "s_register_operand" "w")
2512 (match_operand:SI 3 "immediate_operand" "i")]
2515 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2516 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2519 (define_expand "neon_vabs<mode>"
2520 [(match_operand:VDQW 0 "s_register_operand" "")
2521 (match_operand:VDQW 1 "s_register_operand" "")
2522 (match_operand:SI 2 "immediate_operand" "")]
2525 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2529 (define_insn "neon_vqabs<mode>"
2530 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2531 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2532 (match_operand:SI 2 "immediate_operand" "i")]
2535 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2536 [(set_attr "type" "neon_qabs<q>")]
2539 (define_expand "neon_vneg<mode>"
2540 [(match_operand:VDQW 0 "s_register_operand" "")
2541 (match_operand:VDQW 1 "s_register_operand" "")
2542 (match_operand:SI 2 "immediate_operand" "")]
2545 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2549 (define_insn "neon_vqneg<mode>"
2550 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2551 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2552 (match_operand:SI 2 "immediate_operand" "i")]
2555 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2556 [(set_attr "type" "neon_qneg<q>")]
2559 (define_insn "neon_vcls<mode>"
2560 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2561 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2562 (match_operand:SI 2 "immediate_operand" "i")]
2565 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2566 [(set_attr "type" "neon_cls<q>")]
2569 (define_insn "clz<mode>2"
2570 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2571 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2573 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2574 [(set_attr "type" "neon_cnt<q>")]
2577 (define_expand "neon_vclz<mode>"
2578 [(match_operand:VDQIW 0 "s_register_operand" "")
2579 (match_operand:VDQIW 1 "s_register_operand" "")
2580 (match_operand:SI 2 "immediate_operand" "")]
2583 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2587 (define_insn "popcount<mode>2"
2588 [(set (match_operand:VE 0 "s_register_operand" "=w")
2589 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2591 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2592 [(set_attr "type" "neon_cnt<q>")]
2595 (define_expand "neon_vcnt<mode>"
2596 [(match_operand:VE 0 "s_register_operand" "=w")
2597 (match_operand:VE 1 "s_register_operand" "w")
2598 (match_operand:SI 2 "immediate_operand" "i")]
2601 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2605 (define_insn "neon_vrecpe<mode>"
2606 [(set (match_operand:V32 0 "s_register_operand" "=w")
2607 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2608 (match_operand:SI 2 "immediate_operand" "i")]
2611 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2612 [(set_attr "type" "neon_fp_recpe_s<q>")]
2615 (define_insn "neon_vrsqrte<mode>"
2616 [(set (match_operand:V32 0 "s_register_operand" "=w")
2617 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2618 (match_operand:SI 2 "immediate_operand" "i")]
2621 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2622 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2625 (define_expand "neon_vmvn<mode>"
2626 [(match_operand:VDQIW 0 "s_register_operand" "")
2627 (match_operand:VDQIW 1 "s_register_operand" "")
2628 (match_operand:SI 2 "immediate_operand" "")]
2631 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2635 (define_insn "neon_vget_lane<mode>_sext_internal"
2636 [(set (match_operand:SI 0 "s_register_operand" "=r")
2638 (vec_select:<V_elem>
2639 (match_operand:VD 1 "s_register_operand" "w")
2640 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2643 if (BYTES_BIG_ENDIAN)
2645 int elt = INTVAL (operands[2]);
2646 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2647 operands[2] = GEN_INT (elt);
2649 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2651 [(set_attr "type" "neon_to_gp")]
2654 (define_insn "neon_vget_lane<mode>_zext_internal"
2655 [(set (match_operand:SI 0 "s_register_operand" "=r")
2657 (vec_select:<V_elem>
2658 (match_operand:VD 1 "s_register_operand" "w")
2659 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2662 if (BYTES_BIG_ENDIAN)
2664 int elt = INTVAL (operands[2]);
2665 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2666 operands[2] = GEN_INT (elt);
2668 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2670 [(set_attr "type" "neon_to_gp")]
2673 (define_insn "neon_vget_lane<mode>_sext_internal"
2674 [(set (match_operand:SI 0 "s_register_operand" "=r")
2676 (vec_select:<V_elem>
2677 (match_operand:VQ 1 "s_register_operand" "w")
2678 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2682 int regno = REGNO (operands[1]);
2683 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2684 unsigned int elt = INTVAL (operands[2]);
2685 unsigned int elt_adj = elt % halfelts;
2687 if (BYTES_BIG_ENDIAN)
2688 elt_adj = halfelts - 1 - elt_adj;
2690 ops[0] = operands[0];
2691 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2692 ops[2] = GEN_INT (elt_adj);
2693 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2697 [(set_attr "type" "neon_to_gp_q")]
2700 (define_insn "neon_vget_lane<mode>_zext_internal"
2701 [(set (match_operand:SI 0 "s_register_operand" "=r")
2703 (vec_select:<V_elem>
2704 (match_operand:VQ 1 "s_register_operand" "w")
2705 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2709 int regno = REGNO (operands[1]);
2710 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2711 unsigned int elt = INTVAL (operands[2]);
2712 unsigned int elt_adj = elt % halfelts;
2714 if (BYTES_BIG_ENDIAN)
2715 elt_adj = halfelts - 1 - elt_adj;
2717 ops[0] = operands[0];
2718 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2719 ops[2] = GEN_INT (elt_adj);
2720 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2724 [(set_attr "type" "neon_to_gp_q")]
2727 (define_expand "neon_vget_lane<mode>"
2728 [(match_operand:<V_ext> 0 "s_register_operand" "")
2729 (match_operand:VDQW 1 "s_register_operand" "")
2730 (match_operand:SI 2 "immediate_operand" "")
2731 (match_operand:SI 3 "immediate_operand" "")]
2734 HOST_WIDE_INT magic = INTVAL (operands[3]);
2737 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2739 if (BYTES_BIG_ENDIAN)
2741 /* The intrinsics are defined in terms of a model where the
2742 element ordering in memory is vldm order, whereas the generic
2743 RTL is defined in terms of a model where the element ordering
2744 in memory is array order. Convert the lane number to conform
2746 unsigned int elt = INTVAL (operands[2]);
2747 unsigned int reg_nelts
2748 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2749 elt ^= reg_nelts - 1;
2750 operands[2] = GEN_INT (elt);
2753 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2754 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2757 if ((magic & 1) != 0)
2758 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2761 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2768 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2771 (define_expand "neon_vget_lanedi"
2772 [(match_operand:DI 0 "s_register_operand" "=r")
2773 (match_operand:DI 1 "s_register_operand" "w")
2774 (match_operand:SI 2 "immediate_operand" "i")
2775 (match_operand:SI 3 "immediate_operand" "i")]
2778 neon_lane_bounds (operands[2], 0, 1);
2779 emit_move_insn (operands[0], operands[1]);
2783 (define_expand "neon_vget_lanev2di"
2784 [(match_operand:DI 0 "s_register_operand" "")
2785 (match_operand:V2DI 1 "s_register_operand" "")
2786 (match_operand:SI 2 "immediate_operand" "")
2787 (match_operand:SI 3 "immediate_operand" "")]
2790 switch (INTVAL (operands[2]))
2793 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2796 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2799 neon_lane_bounds (operands[2], 0, 1);
2805 (define_expand "neon_vset_lane<mode>"
2806 [(match_operand:VDQ 0 "s_register_operand" "=w")
2807 (match_operand:<V_elem> 1 "s_register_operand" "r")
2808 (match_operand:VDQ 2 "s_register_operand" "0")
2809 (match_operand:SI 3 "immediate_operand" "i")]
2812 unsigned int elt = INTVAL (operands[3]);
2813 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2815 if (BYTES_BIG_ENDIAN)
2817 unsigned int reg_nelts
2818 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2819 elt ^= reg_nelts - 1;
2822 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2823 GEN_INT (1 << elt), operands[2]));
2827 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2829 (define_expand "neon_vset_lanedi"
2830 [(match_operand:DI 0 "s_register_operand" "=w")
2831 (match_operand:DI 1 "s_register_operand" "r")
2832 (match_operand:DI 2 "s_register_operand" "0")
2833 (match_operand:SI 3 "immediate_operand" "i")]
2836 neon_lane_bounds (operands[3], 0, 1);
2837 emit_move_insn (operands[0], operands[1]);
2841 (define_expand "neon_vcreate<mode>"
2842 [(match_operand:VDX 0 "s_register_operand" "")
2843 (match_operand:DI 1 "general_operand" "")]
2846 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2847 emit_move_insn (operands[0], src);
2851 (define_insn "neon_vdup_n<mode>"
2852 [(set (match_operand:VX 0 "s_register_operand" "=w")
2853 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2855 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2856 [(set_attr "type" "neon_from_gp<q>")]
2859 (define_insn "neon_vdup_n<mode>"
2860 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2861 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2864 vdup.<V_sz_elem>\t%<V_reg>0, %1
2865 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2866 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2869 (define_expand "neon_vdup_ndi"
2870 [(match_operand:DI 0 "s_register_operand" "=w")
2871 (match_operand:DI 1 "s_register_operand" "r")]
2874 emit_move_insn (operands[0], operands[1]);
2879 (define_insn "neon_vdup_nv2di"
2880 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2881 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2884 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2885 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2886 [(set_attr "length" "8")
2887 (set_attr "type" "multiple")]
2890 (define_insn "neon_vdup_lane<mode>_internal"
2891 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2893 (vec_select:<V_elem>
2894 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2895 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2898 if (BYTES_BIG_ENDIAN)
2900 int elt = INTVAL (operands[2]);
2901 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2902 operands[2] = GEN_INT (elt);
2905 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2907 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2909 [(set_attr "type" "neon_dup<q>")]
2912 (define_expand "neon_vdup_lane<mode>"
2913 [(match_operand:VDQW 0 "s_register_operand" "=w")
2914 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2915 (match_operand:SI 2 "immediate_operand" "i")]
2918 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2919 if (BYTES_BIG_ENDIAN)
2921 unsigned int elt = INTVAL (operands[2]);
2922 unsigned int reg_nelts
2923 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2924 elt ^= reg_nelts - 1;
2925 operands[2] = GEN_INT (elt);
2927 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2932 ; Scalar index is ignored, since only zero is valid here.
2933 (define_expand "neon_vdup_lanedi"
2934 [(match_operand:DI 0 "s_register_operand" "=w")
2935 (match_operand:DI 1 "s_register_operand" "w")
2936 (match_operand:SI 2 "immediate_operand" "i")]
2939 neon_lane_bounds (operands[2], 0, 1);
2940 emit_move_insn (operands[0], operands[1]);
2944 ; Likewise for v2di, as the DImode second operand has only a single element.
2945 (define_expand "neon_vdup_lanev2di"
2946 [(match_operand:V2DI 0 "s_register_operand" "=w")
2947 (match_operand:DI 1 "s_register_operand" "w")
2948 (match_operand:SI 2 "immediate_operand" "i")]
2951 neon_lane_bounds (operands[2], 0, 1);
2952 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2956 ; Disabled before reload because we don't want combine doing something silly,
2957 ; but used by the post-reload expansion of neon_vcombine.
2958 (define_insn "*neon_vswp<mode>"
2959 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2960 (match_operand:VDQX 1 "s_register_operand" "+w"))
2961 (set (match_dup 1) (match_dup 0))]
2962 "TARGET_NEON && reload_completed"
2963 "vswp\t%<V_reg>0, %<V_reg>1"
2964 [(set_attr "type" "neon_permute<q>")]
2967 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2969 ;; FIXME: A different implementation of this builtin could make it much
2970 ;; more likely that we wouldn't actually need to output anything (we could make
2971 ;; it so that the reg allocator puts things in the right places magically
2972 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2974 (define_insn_and_split "neon_vcombine<mode>"
2975 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2976 (vec_concat:<V_DOUBLE>
2977 (match_operand:VDX 1 "s_register_operand" "w")
2978 (match_operand:VDX 2 "s_register_operand" "w")))]
2981 "&& reload_completed"
2984 neon_split_vcombine (operands);
2987 [(set_attr "type" "multiple")]
2990 (define_expand "neon_vget_high<mode>"
2991 [(match_operand:<V_HALF> 0 "s_register_operand")
2992 (match_operand:VQX 1 "s_register_operand")]
2995 emit_move_insn (operands[0],
2996 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2997 GET_MODE_SIZE (<V_HALF>mode)));
3001 (define_expand "neon_vget_low<mode>"
3002 [(match_operand:<V_HALF> 0 "s_register_operand")
3003 (match_operand:VQX 1 "s_register_operand")]
3006 emit_move_insn (operands[0],
3007 simplify_gen_subreg (<V_HALF>mode, operands[1],
3012 (define_insn "float<mode><V_cvtto>2"
3013 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3014 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3015 "TARGET_NEON && !flag_rounding_math"
3016 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3017 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3020 (define_insn "floatuns<mode><V_cvtto>2"
3021 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3022 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3023 "TARGET_NEON && !flag_rounding_math"
3024 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3025 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3028 (define_insn "fix_trunc<mode><V_cvtto>2"
3029 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3030 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3032 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3033 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3036 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3037 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3038 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3040 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3041 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3044 (define_insn "neon_vcvt<mode>"
3045 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3046 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3047 (match_operand:SI 2 "immediate_operand" "i")]
3050 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3051 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3054 (define_insn "neon_vcvt<mode>"
3055 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3056 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3057 (match_operand:SI 2 "immediate_operand" "i")]
3060 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3061 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3064 (define_insn "neon_vcvtv4sfv4hf"
3065 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3066 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3068 "TARGET_NEON && TARGET_FP16"
3069 "vcvt.f32.f16\t%q0, %P1"
3070 [(set_attr "type" "neon_fp_cvt_widen_h")]
3073 (define_insn "neon_vcvtv4hfv4sf"
3074 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3075 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3077 "TARGET_NEON && TARGET_FP16"
3078 "vcvt.f16.f32\t%P0, %q1"
3079 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3082 (define_insn "neon_vcvt_n<mode>"
3083 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3084 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3085 (match_operand:SI 2 "immediate_operand" "i")
3086 (match_operand:SI 3 "immediate_operand" "i")]
3090 neon_const_bounds (operands[2], 1, 33);
3091 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3093 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3096 (define_insn "neon_vcvt_n<mode>"
3097 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3098 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3099 (match_operand:SI 2 "immediate_operand" "i")
3100 (match_operand:SI 3 "immediate_operand" "i")]
3104 neon_const_bounds (operands[2], 1, 33);
3105 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3107 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3110 (define_insn "neon_vmovn<mode>"
3111 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3112 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3113 (match_operand:SI 2 "immediate_operand" "i")]
3116 "vmovn.<V_if_elem>\t%P0, %q1"
3117 [(set_attr "type" "neon_shift_imm_narrow_q")]
3120 (define_insn "neon_vqmovn<mode>"
3121 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3122 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3123 (match_operand:SI 2 "immediate_operand" "i")]
3126 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3127 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3130 (define_insn "neon_vqmovun<mode>"
3131 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3132 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3133 (match_operand:SI 2 "immediate_operand" "i")]
3136 "vqmovun.<V_s_elem>\t%P0, %q1"
3137 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3140 (define_insn "neon_vmovl<mode>"
3141 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3142 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3143 (match_operand:SI 2 "immediate_operand" "i")]
3146 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3147 [(set_attr "type" "neon_shift_imm_long")]
3150 (define_insn "neon_vmul_lane<mode>"
3151 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3152 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3153 (match_operand:VMD 2 "s_register_operand"
3154 "<scalar_mul_constraint>")
3155 (match_operand:SI 3 "immediate_operand" "i")
3156 (match_operand:SI 4 "immediate_operand" "i")]
3160 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3161 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3164 (if_then_else (match_test "<Is_float_mode>")
3165 (const_string "neon_fp_mul_s_scalar<q>")
3166 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3169 (define_insn "neon_vmul_lane<mode>"
3170 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3171 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3172 (match_operand:<V_HALF> 2 "s_register_operand"
3173 "<scalar_mul_constraint>")
3174 (match_operand:SI 3 "immediate_operand" "i")
3175 (match_operand:SI 4 "immediate_operand" "i")]
3179 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3180 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3183 (if_then_else (match_test "<Is_float_mode>")
3184 (const_string "neon_fp_mul_s_scalar<q>")
3185 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3188 (define_insn "neon_vmull_lane<mode>"
3189 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3190 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3191 (match_operand:VMDI 2 "s_register_operand"
3192 "<scalar_mul_constraint>")
3193 (match_operand:SI 3 "immediate_operand" "i")
3194 (match_operand:SI 4 "immediate_operand" "i")]
3195 UNSPEC_VMULL_LANE))]
3198 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3199 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3201 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3204 (define_insn "neon_vqdmull_lane<mode>"
3205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3206 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3207 (match_operand:VMDI 2 "s_register_operand"
3208 "<scalar_mul_constraint>")
3209 (match_operand:SI 3 "immediate_operand" "i")
3210 (match_operand:SI 4 "immediate_operand" "i")]
3211 UNSPEC_VQDMULL_LANE))]
3214 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3215 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3217 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3220 (define_insn "neon_vqdmulh_lane<mode>"
3221 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3222 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3223 (match_operand:<V_HALF> 2 "s_register_operand"
3224 "<scalar_mul_constraint>")
3225 (match_operand:SI 3 "immediate_operand" "i")
3226 (match_operand:SI 4 "immediate_operand" "i")]
3227 UNSPEC_VQDMULH_LANE))]
3230 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3231 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3233 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3236 (define_insn "neon_vqdmulh_lane<mode>"
3237 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3238 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3239 (match_operand:VMDI 2 "s_register_operand"
3240 "<scalar_mul_constraint>")
3241 (match_operand:SI 3 "immediate_operand" "i")
3242 (match_operand:SI 4 "immediate_operand" "i")]
3243 UNSPEC_VQDMULH_LANE))]
3246 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3247 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3249 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3252 (define_insn "neon_vmla_lane<mode>"
3253 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3254 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3255 (match_operand:VMD 2 "s_register_operand" "w")
3256 (match_operand:VMD 3 "s_register_operand"
3257 "<scalar_mul_constraint>")
3258 (match_operand:SI 4 "immediate_operand" "i")
3259 (match_operand:SI 5 "immediate_operand" "i")]
3263 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3264 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3267 (if_then_else (match_test "<Is_float_mode>")
3268 (const_string "neon_fp_mla_s_scalar<q>")
3269 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3272 (define_insn "neon_vmla_lane<mode>"
3273 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3274 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3275 (match_operand:VMQ 2 "s_register_operand" "w")
3276 (match_operand:<V_HALF> 3 "s_register_operand"
3277 "<scalar_mul_constraint>")
3278 (match_operand:SI 4 "immediate_operand" "i")
3279 (match_operand:SI 5 "immediate_operand" "i")]
3283 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3284 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3287 (if_then_else (match_test "<Is_float_mode>")
3288 (const_string "neon_fp_mla_s_scalar<q>")
3289 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3292 (define_insn "neon_vmlal_lane<mode>"
3293 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3294 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3295 (match_operand:VMDI 2 "s_register_operand" "w")
3296 (match_operand:VMDI 3 "s_register_operand"
3297 "<scalar_mul_constraint>")
3298 (match_operand:SI 4 "immediate_operand" "i")
3299 (match_operand:SI 5 "immediate_operand" "i")]
3300 UNSPEC_VMLAL_LANE))]
3303 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3304 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3306 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3309 (define_insn "neon_vqdmlal_lane<mode>"
3310 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3311 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3312 (match_operand:VMDI 2 "s_register_operand" "w")
3313 (match_operand:VMDI 3 "s_register_operand"
3314 "<scalar_mul_constraint>")
3315 (match_operand:SI 4 "immediate_operand" "i")
3316 (match_operand:SI 5 "immediate_operand" "i")]
3317 UNSPEC_VQDMLAL_LANE))]
3320 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3321 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3323 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3326 (define_insn "neon_vmls_lane<mode>"
3327 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3328 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3329 (match_operand:VMD 2 "s_register_operand" "w")
3330 (match_operand:VMD 3 "s_register_operand"
3331 "<scalar_mul_constraint>")
3332 (match_operand:SI 4 "immediate_operand" "i")
3333 (match_operand:SI 5 "immediate_operand" "i")]
3337 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3338 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3341 (if_then_else (match_test "<Is_float_mode>")
3342 (const_string "neon_fp_mla_s_scalar<q>")
3343 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3346 (define_insn "neon_vmls_lane<mode>"
3347 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3348 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3349 (match_operand:VMQ 2 "s_register_operand" "w")
3350 (match_operand:<V_HALF> 3 "s_register_operand"
3351 "<scalar_mul_constraint>")
3352 (match_operand:SI 4 "immediate_operand" "i")
3353 (match_operand:SI 5 "immediate_operand" "i")]
3357 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3358 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3361 (if_then_else (match_test "<Is_float_mode>")
3362 (const_string "neon_fp_mla_s_scalar<q>")
3363 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3366 (define_insn "neon_vmlsl_lane<mode>"
3367 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3368 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3369 (match_operand:VMDI 2 "s_register_operand" "w")
3370 (match_operand:VMDI 3 "s_register_operand"
3371 "<scalar_mul_constraint>")
3372 (match_operand:SI 4 "immediate_operand" "i")
3373 (match_operand:SI 5 "immediate_operand" "i")]
3374 UNSPEC_VMLSL_LANE))]
3377 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3378 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3380 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3383 (define_insn "neon_vqdmlsl_lane<mode>"
3384 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3385 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3386 (match_operand:VMDI 2 "s_register_operand" "w")
3387 (match_operand:VMDI 3 "s_register_operand"
3388 "<scalar_mul_constraint>")
3389 (match_operand:SI 4 "immediate_operand" "i")
3390 (match_operand:SI 5 "immediate_operand" "i")]
3391 UNSPEC_VQDMLSL_LANE))]
3394 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3395 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3397 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3400 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3401 ; core register into a temp register, then use a scalar taken from that. This
3402 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3403 ; or extracted from another vector. The latter case it's currently better to
3404 ; use the "_lane" variant, and the former case can probably be implemented
3405 ; using vld1_lane, but that hasn't been done yet.
3407 (define_expand "neon_vmul_n<mode>"
3408 [(match_operand:VMD 0 "s_register_operand" "")
3409 (match_operand:VMD 1 "s_register_operand" "")
3410 (match_operand:<V_elem> 2 "s_register_operand" "")
3411 (match_operand:SI 3 "immediate_operand" "")]
3414 rtx tmp = gen_reg_rtx (<MODE>mode);
3415 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3416 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3417 const0_rtx, const0_rtx));
3421 (define_expand "neon_vmul_n<mode>"
3422 [(match_operand:VMQ 0 "s_register_operand" "")
3423 (match_operand:VMQ 1 "s_register_operand" "")
3424 (match_operand:<V_elem> 2 "s_register_operand" "")
3425 (match_operand:SI 3 "immediate_operand" "")]
3428 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3429 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3430 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3431 const0_rtx, const0_rtx));
3435 (define_expand "neon_vmull_n<mode>"
3436 [(match_operand:<V_widen> 0 "s_register_operand" "")
3437 (match_operand:VMDI 1 "s_register_operand" "")
3438 (match_operand:<V_elem> 2 "s_register_operand" "")
3439 (match_operand:SI 3 "immediate_operand" "")]
3442 rtx tmp = gen_reg_rtx (<MODE>mode);
3443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3444 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3445 const0_rtx, operands[3]));
3449 (define_expand "neon_vqdmull_n<mode>"
3450 [(match_operand:<V_widen> 0 "s_register_operand" "")
3451 (match_operand:VMDI 1 "s_register_operand" "")
3452 (match_operand:<V_elem> 2 "s_register_operand" "")
3453 (match_operand:SI 3 "immediate_operand" "")]
3456 rtx tmp = gen_reg_rtx (<MODE>mode);
3457 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3458 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3459 const0_rtx, const0_rtx));
3463 (define_expand "neon_vqdmulh_n<mode>"
3464 [(match_operand:VMDI 0 "s_register_operand" "")
3465 (match_operand:VMDI 1 "s_register_operand" "")
3466 (match_operand:<V_elem> 2 "s_register_operand" "")
3467 (match_operand:SI 3 "immediate_operand" "")]
3470 rtx tmp = gen_reg_rtx (<MODE>mode);
3471 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3472 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3473 const0_rtx, operands[3]));
3477 (define_expand "neon_vqdmulh_n<mode>"
3478 [(match_operand:VMQI 0 "s_register_operand" "")
3479 (match_operand:VMQI 1 "s_register_operand" "")
3480 (match_operand:<V_elem> 2 "s_register_operand" "")
3481 (match_operand:SI 3 "immediate_operand" "")]
3484 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3485 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3486 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3487 const0_rtx, operands[3]));
3491 (define_expand "neon_vmla_n<mode>"
3492 [(match_operand:VMD 0 "s_register_operand" "")
3493 (match_operand:VMD 1 "s_register_operand" "")
3494 (match_operand:VMD 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")
3496 (match_operand:SI 4 "immediate_operand" "")]
3499 rtx tmp = gen_reg_rtx (<MODE>mode);
3500 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3501 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3502 tmp, const0_rtx, operands[4]));
3506 (define_expand "neon_vmla_n<mode>"
3507 [(match_operand:VMQ 0 "s_register_operand" "")
3508 (match_operand:VMQ 1 "s_register_operand" "")
3509 (match_operand:VMQ 2 "s_register_operand" "")
3510 (match_operand:<V_elem> 3 "s_register_operand" "")
3511 (match_operand:SI 4 "immediate_operand" "")]
3514 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3515 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3516 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3517 tmp, const0_rtx, operands[4]));
3521 (define_expand "neon_vmlal_n<mode>"
3522 [(match_operand:<V_widen> 0 "s_register_operand" "")
3523 (match_operand:<V_widen> 1 "s_register_operand" "")
3524 (match_operand:VMDI 2 "s_register_operand" "")
3525 (match_operand:<V_elem> 3 "s_register_operand" "")
3526 (match_operand:SI 4 "immediate_operand" "")]
3529 rtx tmp = gen_reg_rtx (<MODE>mode);
3530 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3531 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3532 tmp, const0_rtx, operands[4]));
3536 (define_expand "neon_vqdmlal_n<mode>"
3537 [(match_operand:<V_widen> 0 "s_register_operand" "")
3538 (match_operand:<V_widen> 1 "s_register_operand" "")
3539 (match_operand:VMDI 2 "s_register_operand" "")
3540 (match_operand:<V_elem> 3 "s_register_operand" "")
3541 (match_operand:SI 4 "immediate_operand" "")]
3544 rtx tmp = gen_reg_rtx (<MODE>mode);
3545 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3546 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3547 tmp, const0_rtx, operands[4]));
3551 (define_expand "neon_vmls_n<mode>"
3552 [(match_operand:VMD 0 "s_register_operand" "")
3553 (match_operand:VMD 1 "s_register_operand" "")
3554 (match_operand:VMD 2 "s_register_operand" "")
3555 (match_operand:<V_elem> 3 "s_register_operand" "")
3556 (match_operand:SI 4 "immediate_operand" "")]
3559 rtx tmp = gen_reg_rtx (<MODE>mode);
3560 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3561 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3562 tmp, const0_rtx, operands[4]));
3566 (define_expand "neon_vmls_n<mode>"
3567 [(match_operand:VMQ 0 "s_register_operand" "")
3568 (match_operand:VMQ 1 "s_register_operand" "")
3569 (match_operand:VMQ 2 "s_register_operand" "")
3570 (match_operand:<V_elem> 3 "s_register_operand" "")
3571 (match_operand:SI 4 "immediate_operand" "")]
3574 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3575 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3576 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3577 tmp, const0_rtx, operands[4]));
3581 (define_expand "neon_vmlsl_n<mode>"
3582 [(match_operand:<V_widen> 0 "s_register_operand" "")
3583 (match_operand:<V_widen> 1 "s_register_operand" "")
3584 (match_operand:VMDI 2 "s_register_operand" "")
3585 (match_operand:<V_elem> 3 "s_register_operand" "")
3586 (match_operand:SI 4 "immediate_operand" "")]
3589 rtx tmp = gen_reg_rtx (<MODE>mode);
3590 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3591 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3592 tmp, const0_rtx, operands[4]));
3596 (define_expand "neon_vqdmlsl_n<mode>"
3597 [(match_operand:<V_widen> 0 "s_register_operand" "")
3598 (match_operand:<V_widen> 1 "s_register_operand" "")
3599 (match_operand:VMDI 2 "s_register_operand" "")
3600 (match_operand:<V_elem> 3 "s_register_operand" "")
3601 (match_operand:SI 4 "immediate_operand" "")]
3604 rtx tmp = gen_reg_rtx (<MODE>mode);
3605 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3606 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3607 tmp, const0_rtx, operands[4]));
3611 (define_insn "neon_vext<mode>"
3612 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3613 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3614 (match_operand:VDQX 2 "s_register_operand" "w")
3615 (match_operand:SI 3 "immediate_operand" "i")]
3619 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3620 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3622 [(set_attr "type" "neon_ext<q>")]
3625 (define_insn "neon_vrev64<mode>"
3626 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3627 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3628 (match_operand:SI 2 "immediate_operand" "i")]
3631 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3632 [(set_attr "type" "neon_rev<q>")]
3635 (define_insn "neon_vrev32<mode>"
3636 [(set (match_operand:VX 0 "s_register_operand" "=w")
3637 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3638 (match_operand:SI 2 "immediate_operand" "i")]
3641 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3642 [(set_attr "type" "neon_rev<q>")]
3645 (define_insn "neon_vrev16<mode>"
3646 [(set (match_operand:VE 0 "s_register_operand" "=w")
3647 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3648 (match_operand:SI 2 "immediate_operand" "i")]
3651 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3652 [(set_attr "type" "neon_rev<q>")]
3655 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3656 ; allocation. For an intrinsic of form:
3657 ; rD = vbsl_* (rS, rN, rM)
3658 ; We can use any of:
3659 ; vbsl rS, rN, rM (if D = S)
3660 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3661 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3663 (define_insn "neon_vbsl<mode>_internal"
3664 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3665 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3666 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3667 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3671 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3672 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3673 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3674 [(set_attr "type" "neon_bsl<q>")]
3677 (define_expand "neon_vbsl<mode>"
3678 [(set (match_operand:VDQX 0 "s_register_operand" "")
3679 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3680 (match_operand:VDQX 2 "s_register_operand" "")
3681 (match_operand:VDQX 3 "s_register_operand" "")]
3685 /* We can't alias operands together if they have different modes. */
3686 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3689 (define_insn "neon_vshl<mode>"
3690 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3691 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3692 (match_operand:VDQIX 2 "s_register_operand" "w")
3693 (match_operand:SI 3 "immediate_operand" "i")]
3696 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3697 [(set_attr "type" "neon_shift_imm<q>")]
3700 (define_insn "neon_vqshl<mode>"
3701 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3702 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3703 (match_operand:VDQIX 2 "s_register_operand" "w")
3704 (match_operand:SI 3 "immediate_operand" "i")]
3707 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3708 [(set_attr "type" "neon_sat_shift_imm<q>")]
3711 (define_insn "neon_vshr_n<mode>"
3712 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3713 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3714 (match_operand:SI 2 "immediate_operand" "i")
3715 (match_operand:SI 3 "immediate_operand" "i")]
3719 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3720 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3722 [(set_attr "type" "neon_shift_imm<q>")]
3725 (define_insn "neon_vshrn_n<mode>"
3726 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3727 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3728 (match_operand:SI 2 "immediate_operand" "i")
3729 (match_operand:SI 3 "immediate_operand" "i")]
3733 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3734 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3736 [(set_attr "type" "neon_shift_imm_narrow_q")]
3739 (define_insn "neon_vqshrn_n<mode>"
3740 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3741 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3742 (match_operand:SI 2 "immediate_operand" "i")
3743 (match_operand:SI 3 "immediate_operand" "i")]
3747 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3748 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3750 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3753 (define_insn "neon_vqshrun_n<mode>"
3754 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3755 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3756 (match_operand:SI 2 "immediate_operand" "i")
3757 (match_operand:SI 3 "immediate_operand" "i")]
3761 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3762 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3764 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3767 (define_insn "neon_vshl_n<mode>"
3768 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3769 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3770 (match_operand:SI 2 "immediate_operand" "i")
3771 (match_operand:SI 3 "immediate_operand" "i")]
3775 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3776 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3778 [(set_attr "type" "neon_shift_imm<q>")]
3781 (define_insn "neon_vqshl_n<mode>"
3782 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3783 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3784 (match_operand:SI 2 "immediate_operand" "i")
3785 (match_operand:SI 3 "immediate_operand" "i")]
3789 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3790 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3792 [(set_attr "type" "neon_sat_shift_imm<q>")]
3795 (define_insn "neon_vqshlu_n<mode>"
3796 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3797 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3798 (match_operand:SI 2 "immediate_operand" "i")
3799 (match_operand:SI 3 "immediate_operand" "i")]
3803 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3804 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3806 [(set_attr "type" "neon_sat_shift_imm<q>")]
3809 (define_insn "neon_vshll_n<mode>"
3810 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3811 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3812 (match_operand:SI 2 "immediate_operand" "i")
3813 (match_operand:SI 3 "immediate_operand" "i")]
3817 /* The boundaries are: 0 < imm <= size. */
3818 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3819 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3821 [(set_attr "type" "neon_shift_imm_long")]
3824 (define_insn "neon_vsra_n<mode>"
3825 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3826 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3827 (match_operand:VDQIX 2 "s_register_operand" "w")
3828 (match_operand:SI 3 "immediate_operand" "i")
3829 (match_operand:SI 4 "immediate_operand" "i")]
3833 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3834 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3836 [(set_attr "type" "neon_shift_acc<q>")]
3839 (define_insn "neon_vsri_n<mode>"
3840 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3841 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3842 (match_operand:VDQIX 2 "s_register_operand" "w")
3843 (match_operand:SI 3 "immediate_operand" "i")]
3847 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3848 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3850 [(set_attr "type" "neon_shift_reg<q>")]
3853 (define_insn "neon_vsli_n<mode>"
3854 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3855 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3856 (match_operand:VDQIX 2 "s_register_operand" "w")
3857 (match_operand:SI 3 "immediate_operand" "i")]
3861 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3862 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3864 [(set_attr "type" "neon_shift_reg<q>")]
3867 (define_insn "neon_vtbl1v8qi"
3868 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3869 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3870 (match_operand:V8QI 2 "s_register_operand" "w")]
3873 "vtbl.8\t%P0, {%P1}, %P2"
3874 [(set_attr "type" "neon_tbl1")]
3877 (define_insn "neon_vtbl2v8qi"
3878 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3879 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3880 (match_operand:V8QI 2 "s_register_operand" "w")]
3885 int tabbase = REGNO (operands[1]);
3887 ops[0] = operands[0];
3888 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3889 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3890 ops[3] = operands[2];
3891 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3895 [(set_attr "type" "neon_tbl2")]
3898 (define_insn "neon_vtbl3v8qi"
3899 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3900 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3901 (match_operand:V8QI 2 "s_register_operand" "w")]
3906 int tabbase = REGNO (operands[1]);
3908 ops[0] = operands[0];
3909 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3910 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3911 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3912 ops[4] = operands[2];
3913 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3917 [(set_attr "type" "neon_tbl3")]
3920 (define_insn "neon_vtbl4v8qi"
3921 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3922 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3923 (match_operand:V8QI 2 "s_register_operand" "w")]
3928 int tabbase = REGNO (operands[1]);
3930 ops[0] = operands[0];
3931 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3932 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3933 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3934 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3935 ops[5] = operands[2];
3936 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3940 [(set_attr "type" "neon_tbl4")]
3943 ;; These three are used by the vec_perm infrastructure for V16QImode.
3944 (define_insn_and_split "neon_vtbl1v16qi"
3945 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3946 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3947 (match_operand:V16QI 2 "s_register_operand" "w")]
3951 "&& reload_completed"
3954 rtx op0, op1, op2, part0, part2;
3958 op1 = gen_lowpart (TImode, operands[1]);
3961 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3962 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3963 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3964 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3966 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3967 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3968 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3969 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3972 [(set_attr "type" "multiple")]
3975 (define_insn_and_split "neon_vtbl2v16qi"
3976 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3977 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3978 (match_operand:V16QI 2 "s_register_operand" "w")]
3982 "&& reload_completed"
3985 rtx op0, op1, op2, part0, part2;
3992 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3993 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3994 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3995 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3997 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3998 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3999 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4000 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4003 [(set_attr "type" "multiple")]
4006 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4007 ;; handle quad-word input modes, producing octa-word output modes. But
4008 ;; that requires us to add support for octa-word vector modes in moves.
4009 ;; That seems overkill for this one use in vec_perm.
4010 (define_insn_and_split "neon_vcombinev16qi"
4011 [(set (match_operand:OI 0 "s_register_operand" "=w")
4012 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4013 (match_operand:V16QI 2 "s_register_operand" "w")]
4017 "&& reload_completed"
4020 neon_split_vcombine (operands);
4023 [(set_attr "type" "multiple")]
4026 (define_insn "neon_vtbx1v8qi"
4027 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4028 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4029 (match_operand:V8QI 2 "s_register_operand" "w")
4030 (match_operand:V8QI 3 "s_register_operand" "w")]
4033 "vtbx.8\t%P0, {%P2}, %P3"
4034 [(set_attr "type" "neon_tbl1")]
4037 (define_insn "neon_vtbx2v8qi"
4038 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4039 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4040 (match_operand:TI 2 "s_register_operand" "w")
4041 (match_operand:V8QI 3 "s_register_operand" "w")]
4046 int tabbase = REGNO (operands[2]);
4048 ops[0] = operands[0];
4049 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4050 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4051 ops[3] = operands[3];
4052 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4056 [(set_attr "type" "neon_tbl2")]
4059 (define_insn "neon_vtbx3v8qi"
4060 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4061 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4062 (match_operand:EI 2 "s_register_operand" "w")
4063 (match_operand:V8QI 3 "s_register_operand" "w")]
4068 int tabbase = REGNO (operands[2]);
4070 ops[0] = operands[0];
4071 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4072 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4073 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4074 ops[4] = operands[3];
4075 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4079 [(set_attr "type" "neon_tbl3")]
4082 (define_insn "neon_vtbx4v8qi"
4083 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4084 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4085 (match_operand:OI 2 "s_register_operand" "w")
4086 (match_operand:V8QI 3 "s_register_operand" "w")]
4091 int tabbase = REGNO (operands[2]);
4093 ops[0] = operands[0];
4094 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4095 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4096 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4097 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4098 ops[5] = operands[3];
4099 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4103 [(set_attr "type" "neon_tbl4")]
4106 (define_expand "neon_vtrn<mode>_internal"
4108 [(set (match_operand:VDQW 0 "s_register_operand" "")
4109 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4110 (match_operand:VDQW 2 "s_register_operand" "")]
4112 (set (match_operand:VDQW 3 "s_register_operand" "")
4113 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4118 ;; Note: Different operand numbering to handle tied registers correctly.
4119 (define_insn "*neon_vtrn<mode>_insn"
4120 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4121 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4122 (match_operand:VDQW 3 "s_register_operand" "2")]
4124 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4125 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4128 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4129 [(set_attr "type" "neon_permute<q>")]
4132 (define_expand "neon_vtrn<mode>"
4133 [(match_operand:SI 0 "s_register_operand" "r")
4134 (match_operand:VDQW 1 "s_register_operand" "w")
4135 (match_operand:VDQW 2 "s_register_operand" "w")]
4138 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
4139 operands[0], operands[1], operands[2]);
4143 (define_expand "neon_vzip<mode>_internal"
4145 [(set (match_operand:VDQW 0 "s_register_operand" "")
4146 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4147 (match_operand:VDQW 2 "s_register_operand" "")]
4149 (set (match_operand:VDQW 3 "s_register_operand" "")
4150 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4155 ;; Note: Different operand numbering to handle tied registers correctly.
4156 (define_insn "*neon_vzip<mode>_insn"
4157 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4158 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4159 (match_operand:VDQW 3 "s_register_operand" "2")]
4161 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4162 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4165 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4166 [(set_attr "type" "neon_zip<q>")]
4169 (define_expand "neon_vzip<mode>"
4170 [(match_operand:SI 0 "s_register_operand" "r")
4171 (match_operand:VDQW 1 "s_register_operand" "w")
4172 (match_operand:VDQW 2 "s_register_operand" "w")]
4175 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
4176 operands[0], operands[1], operands[2]);
4180 (define_expand "neon_vuzp<mode>_internal"
4182 [(set (match_operand:VDQW 0 "s_register_operand" "")
4183 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4184 (match_operand:VDQW 2 "s_register_operand" "")]
4186 (set (match_operand:VDQW 3 "s_register_operand" "")
4187 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4192 ;; Note: Different operand numbering to handle tied registers correctly.
4193 (define_insn "*neon_vuzp<mode>_insn"
4194 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4195 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4196 (match_operand:VDQW 3 "s_register_operand" "2")]
4198 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4199 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4202 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4203 [(set_attr "type" "neon_zip<q>")]
4206 (define_expand "neon_vuzp<mode>"
4207 [(match_operand:SI 0 "s_register_operand" "r")
4208 (match_operand:VDQW 1 "s_register_operand" "w")
4209 (match_operand:VDQW 2 "s_register_operand" "w")]
4212 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
4213 operands[0], operands[1], operands[2]);
4217 (define_expand "neon_vreinterpretv8qi<mode>"
4218 [(match_operand:V8QI 0 "s_register_operand" "")
4219 (match_operand:VDX 1 "s_register_operand" "")]
4222 neon_reinterpret (operands[0], operands[1]);
4226 (define_expand "neon_vreinterpretv4hi<mode>"
4227 [(match_operand:V4HI 0 "s_register_operand" "")
4228 (match_operand:VDX 1 "s_register_operand" "")]
4231 neon_reinterpret (operands[0], operands[1]);
4235 (define_expand "neon_vreinterpretv2si<mode>"
4236 [(match_operand:V2SI 0 "s_register_operand" "")
4237 (match_operand:VDX 1 "s_register_operand" "")]
4240 neon_reinterpret (operands[0], operands[1]);
4244 (define_expand "neon_vreinterpretv2sf<mode>"
4245 [(match_operand:V2SF 0 "s_register_operand" "")
4246 (match_operand:VDX 1 "s_register_operand" "")]
4249 neon_reinterpret (operands[0], operands[1]);
4253 (define_expand "neon_vreinterpretdi<mode>"
4254 [(match_operand:DI 0 "s_register_operand" "")
4255 (match_operand:VDX 1 "s_register_operand" "")]
4258 neon_reinterpret (operands[0], operands[1]);
4262 (define_expand "neon_vreinterpretv16qi<mode>"
4263 [(match_operand:V16QI 0 "s_register_operand" "")
4264 (match_operand:VQX 1 "s_register_operand" "")]
4267 neon_reinterpret (operands[0], operands[1]);
4271 (define_expand "neon_vreinterpretv8hi<mode>"
4272 [(match_operand:V8HI 0 "s_register_operand" "")
4273 (match_operand:VQX 1 "s_register_operand" "")]
4276 neon_reinterpret (operands[0], operands[1]);
4280 (define_expand "neon_vreinterpretv4si<mode>"
4281 [(match_operand:V4SI 0 "s_register_operand" "")
4282 (match_operand:VQX 1 "s_register_operand" "")]
4285 neon_reinterpret (operands[0], operands[1]);
4289 (define_expand "neon_vreinterpretv4sf<mode>"
4290 [(match_operand:V4SF 0 "s_register_operand" "")
4291 (match_operand:VQX 1 "s_register_operand" "")]
4294 neon_reinterpret (operands[0], operands[1]);
4298 (define_expand "neon_vreinterpretv2di<mode>"
4299 [(match_operand:V2DI 0 "s_register_operand" "")
4300 (match_operand:VQX 1 "s_register_operand" "")]
4303 neon_reinterpret (operands[0], operands[1]);
4307 (define_expand "vec_load_lanes<mode><mode>"
4308 [(set (match_operand:VDQX 0 "s_register_operand")
4309 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4313 (define_insn "neon_vld1<mode>"
4314 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4315 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4318 "vld1.<V_sz_elem>\t%h0, %A1"
4319 [(set_attr "type" "neon_load1_1reg<q>")]
4322 (define_insn "neon_vld1_lane<mode>"
4323 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4324 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4325 (match_operand:VDX 2 "s_register_operand" "0")
4326 (match_operand:SI 3 "immediate_operand" "i")]
4330 HOST_WIDE_INT lane = INTVAL (operands[3]);
4331 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4332 if (lane < 0 || lane >= max)
4333 error ("lane out of range");
4335 return "vld1.<V_sz_elem>\t%P0, %A1";
4337 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4339 [(set_attr "type" "neon_load1_one_lane<q>")]
4342 (define_insn "neon_vld1_lane<mode>"
4343 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4344 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4345 (match_operand:VQX 2 "s_register_operand" "0")
4346 (match_operand:SI 3 "immediate_operand" "i")]
4350 HOST_WIDE_INT lane = INTVAL (operands[3]);
4351 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4352 int regno = REGNO (operands[0]);
4353 if (lane < 0 || lane >= max)
4354 error ("lane out of range");
4355 else if (lane >= max / 2)
4359 operands[3] = GEN_INT (lane);
4361 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4363 return "vld1.<V_sz_elem>\t%P0, %A1";
4365 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4367 [(set_attr "type" "neon_load1_one_lane<q>")]
4370 (define_insn "neon_vld1_dup<mode>"
4371 [(set (match_operand:VD 0 "s_register_operand" "=w")
4372 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4374 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4375 [(set_attr "type" "neon_load1_all_lanes<q>")]
4378 ;; Special case for DImode. Treat it exactly like a simple load.
4379 (define_expand "neon_vld1_dupdi"
4380 [(set (match_operand:DI 0 "s_register_operand" "")
4381 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4387 (define_insn "neon_vld1_dup<mode>"
4388 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4389 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4392 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4394 [(set_attr "type" "neon_load1_all_lanes<q>")]
4397 (define_insn_and_split "neon_vld1_dupv2di"
4398 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4399 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4402 "&& reload_completed"
4405 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4406 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4407 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4410 [(set_attr "length" "8")
4411 (set_attr "type" "neon_load1_all_lanes_q")]
4414 (define_expand "vec_store_lanes<mode><mode>"
4415 [(set (match_operand:VDQX 0 "neon_struct_operand")
4416 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4420 (define_insn "neon_vst1<mode>"
4421 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4422 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4425 "vst1.<V_sz_elem>\t%h1, %A0"
4426 [(set_attr "type" "neon_store1_1reg<q>")])
4428 (define_insn "neon_vst1_lane<mode>"
4429 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4431 [(match_operand:VDX 1 "s_register_operand" "w")
4432 (match_operand:SI 2 "immediate_operand" "i")]
4436 HOST_WIDE_INT lane = INTVAL (operands[2]);
4437 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4438 if (lane < 0 || lane >= max)
4439 error ("lane out of range");
4441 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4443 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4445 [(set_attr "type" "neon_store1_one_lane<q>")]
4448 (define_insn "neon_vst1_lane<mode>"
4449 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4451 [(match_operand:VQX 1 "s_register_operand" "w")
4452 (match_operand:SI 2 "immediate_operand" "i")]
4456 HOST_WIDE_INT lane = INTVAL (operands[2]);
4457 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4458 int regno = REGNO (operands[1]);
4459 if (lane < 0 || lane >= max)
4460 error ("lane out of range");
4461 else if (lane >= max / 2)
4465 operands[2] = GEN_INT (lane);
4467 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4469 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4471 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4473 [(set_attr "type" "neon_store1_one_lane<q>")]
4476 (define_expand "vec_load_lanesti<mode>"
4477 [(set (match_operand:TI 0 "s_register_operand")
4478 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4479 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4483 (define_insn "neon_vld2<mode>"
4484 [(set (match_operand:TI 0 "s_register_operand" "=w")
4485 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4486 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490 if (<V_sz_elem> == 64)
4491 return "vld1.64\t%h0, %A1";
4493 return "vld2.<V_sz_elem>\t%h0, %A1";
4496 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4497 (const_string "neon_load1_2reg<q>")
4498 (const_string "neon_load2_2reg<q>")))]
4501 (define_expand "vec_load_lanesoi<mode>"
4502 [(set (match_operand:OI 0 "s_register_operand")
4503 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4504 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4508 (define_insn "neon_vld2<mode>"
4509 [(set (match_operand:OI 0 "s_register_operand" "=w")
4510 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4511 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4514 "vld2.<V_sz_elem>\t%h0, %A1"
4515 [(set_attr "type" "neon_load2_2reg_q")])
4517 (define_insn "neon_vld2_lane<mode>"
4518 [(set (match_operand:TI 0 "s_register_operand" "=w")
4519 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4520 (match_operand:TI 2 "s_register_operand" "0")
4521 (match_operand:SI 3 "immediate_operand" "i")
4522 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4526 HOST_WIDE_INT lane = INTVAL (operands[3]);
4527 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4528 int regno = REGNO (operands[0]);
4530 if (lane < 0 || lane >= max)
4531 error ("lane out of range");
4532 ops[0] = gen_rtx_REG (DImode, regno);
4533 ops[1] = gen_rtx_REG (DImode, regno + 2);
4534 ops[2] = operands[1];
4535 ops[3] = operands[3];
4536 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4539 [(set_attr "type" "neon_load2_one_lane<q>")]
4542 (define_insn "neon_vld2_lane<mode>"
4543 [(set (match_operand:OI 0 "s_register_operand" "=w")
4544 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4545 (match_operand:OI 2 "s_register_operand" "0")
4546 (match_operand:SI 3 "immediate_operand" "i")
4547 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4551 HOST_WIDE_INT lane = INTVAL (operands[3]);
4552 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4553 int regno = REGNO (operands[0]);
4555 if (lane < 0 || lane >= max)
4556 error ("lane out of range");
4557 else if (lane >= max / 2)
4562 ops[0] = gen_rtx_REG (DImode, regno);
4563 ops[1] = gen_rtx_REG (DImode, regno + 4);
4564 ops[2] = operands[1];
4565 ops[3] = GEN_INT (lane);
4566 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4569 [(set_attr "type" "neon_load2_one_lane<q>")]
4572 (define_insn "neon_vld2_dup<mode>"
4573 [(set (match_operand:TI 0 "s_register_operand" "=w")
4574 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4575 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4580 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4582 return "vld1.<V_sz_elem>\t%h0, %A1";
4585 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4586 (const_string "neon_load2_all_lanes<q>")
4587 (const_string "neon_load1_1reg<q>")))]
4590 (define_expand "vec_store_lanesti<mode>"
4591 [(set (match_operand:TI 0 "neon_struct_operand")
4592 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4593 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 (define_insn "neon_vst2<mode>"
4598 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4599 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4600 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4604 if (<V_sz_elem> == 64)
4605 return "vst1.64\t%h1, %A0";
4607 return "vst2.<V_sz_elem>\t%h1, %A0";
4610 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4611 (const_string "neon_store1_2reg<q>")
4612 (const_string "neon_store2_one_lane<q>")))]
4615 (define_expand "vec_store_lanesoi<mode>"
4616 [(set (match_operand:OI 0 "neon_struct_operand")
4617 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4618 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4622 (define_insn "neon_vst2<mode>"
4623 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4624 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4625 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4628 "vst2.<V_sz_elem>\t%h1, %A0"
4629 [(set_attr "type" "neon_store2_4reg<q>")]
4632 (define_insn "neon_vst2_lane<mode>"
4633 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4634 (unspec:<V_two_elem>
4635 [(match_operand:TI 1 "s_register_operand" "w")
4636 (match_operand:SI 2 "immediate_operand" "i")
4637 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4641 HOST_WIDE_INT lane = INTVAL (operands[2]);
4642 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4643 int regno = REGNO (operands[1]);
4645 if (lane < 0 || lane >= max)
4646 error ("lane out of range");
4647 ops[0] = operands[0];
4648 ops[1] = gen_rtx_REG (DImode, regno);
4649 ops[2] = gen_rtx_REG (DImode, regno + 2);
4650 ops[3] = operands[2];
4651 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4654 [(set_attr "type" "neon_store2_one_lane<q>")]
4657 (define_insn "neon_vst2_lane<mode>"
4658 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4659 (unspec:<V_two_elem>
4660 [(match_operand:OI 1 "s_register_operand" "w")
4661 (match_operand:SI 2 "immediate_operand" "i")
4662 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4666 HOST_WIDE_INT lane = INTVAL (operands[2]);
4667 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4668 int regno = REGNO (operands[1]);
4670 if (lane < 0 || lane >= max)
4671 error ("lane out of range");
4672 else if (lane >= max / 2)
4677 ops[0] = operands[0];
4678 ops[1] = gen_rtx_REG (DImode, regno);
4679 ops[2] = gen_rtx_REG (DImode, regno + 4);
4680 ops[3] = GEN_INT (lane);
4681 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4684 [(set_attr "type" "neon_store2_one_lane<q>")]
4687 (define_expand "vec_load_lanesei<mode>"
4688 [(set (match_operand:EI 0 "s_register_operand")
4689 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4690 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4694 (define_insn "neon_vld3<mode>"
4695 [(set (match_operand:EI 0 "s_register_operand" "=w")
4696 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4697 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4701 if (<V_sz_elem> == 64)
4702 return "vld1.64\t%h0, %A1";
4704 return "vld3.<V_sz_elem>\t%h0, %A1";
4707 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4708 (const_string "neon_load1_3reg<q>")
4709 (const_string "neon_load3_3reg<q>")))]
4712 (define_expand "vec_load_lanesci<mode>"
4713 [(match_operand:CI 0 "s_register_operand")
4714 (match_operand:CI 1 "neon_struct_operand")
4715 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4718 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4722 (define_expand "neon_vld3<mode>"
4723 [(match_operand:CI 0 "s_register_operand")
4724 (match_operand:CI 1 "neon_struct_operand")
4725 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730 mem = adjust_address (operands[1], EImode, 0);
4731 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4732 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4733 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4737 (define_insn "neon_vld3qa<mode>"
4738 [(set (match_operand:CI 0 "s_register_operand" "=w")
4739 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4740 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4744 int regno = REGNO (operands[0]);
4746 ops[0] = gen_rtx_REG (DImode, regno);
4747 ops[1] = gen_rtx_REG (DImode, regno + 4);
4748 ops[2] = gen_rtx_REG (DImode, regno + 8);
4749 ops[3] = operands[1];
4750 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4753 [(set_attr "type" "neon_load3_3reg<q>")]
4756 (define_insn "neon_vld3qb<mode>"
4757 [(set (match_operand:CI 0 "s_register_operand" "=w")
4758 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4759 (match_operand:CI 2 "s_register_operand" "0")
4760 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4764 int regno = REGNO (operands[0]);
4766 ops[0] = gen_rtx_REG (DImode, regno + 2);
4767 ops[1] = gen_rtx_REG (DImode, regno + 6);
4768 ops[2] = gen_rtx_REG (DImode, regno + 10);
4769 ops[3] = operands[1];
4770 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4773 [(set_attr "type" "neon_load3_3reg<q>")]
4776 (define_insn "neon_vld3_lane<mode>"
4777 [(set (match_operand:EI 0 "s_register_operand" "=w")
4778 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4779 (match_operand:EI 2 "s_register_operand" "0")
4780 (match_operand:SI 3 "immediate_operand" "i")
4781 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4785 HOST_WIDE_INT lane = INTVAL (operands[3]);
4786 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4787 int regno = REGNO (operands[0]);
4789 if (lane < 0 || lane >= max)
4790 error ("lane out of range");
4791 ops[0] = gen_rtx_REG (DImode, regno);
4792 ops[1] = gen_rtx_REG (DImode, regno + 2);
4793 ops[2] = gen_rtx_REG (DImode, regno + 4);
4794 ops[3] = operands[1];
4795 ops[4] = operands[3];
4796 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4800 [(set_attr "type" "neon_load3_one_lane<q>")]
4803 (define_insn "neon_vld3_lane<mode>"
4804 [(set (match_operand:CI 0 "s_register_operand" "=w")
4805 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4806 (match_operand:CI 2 "s_register_operand" "0")
4807 (match_operand:SI 3 "immediate_operand" "i")
4808 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 HOST_WIDE_INT lane = INTVAL (operands[3]);
4813 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4814 int regno = REGNO (operands[0]);
4816 if (lane < 0 || lane >= max)
4817 error ("lane out of range");
4818 else if (lane >= max / 2)
4823 ops[0] = gen_rtx_REG (DImode, regno);
4824 ops[1] = gen_rtx_REG (DImode, regno + 4);
4825 ops[2] = gen_rtx_REG (DImode, regno + 8);
4826 ops[3] = operands[1];
4827 ops[4] = GEN_INT (lane);
4828 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4832 [(set_attr "type" "neon_load3_one_lane<q>")]
4835 (define_insn "neon_vld3_dup<mode>"
4836 [(set (match_operand:EI 0 "s_register_operand" "=w")
4837 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4838 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4842 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4844 int regno = REGNO (operands[0]);
4846 ops[0] = gen_rtx_REG (DImode, regno);
4847 ops[1] = gen_rtx_REG (DImode, regno + 2);
4848 ops[2] = gen_rtx_REG (DImode, regno + 4);
4849 ops[3] = operands[1];
4850 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4854 return "vld1.<V_sz_elem>\t%h0, %A1";
4857 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4858 (const_string "neon_load3_all_lanes<q>")
4859 (const_string "neon_load1_1reg<q>")))])
4861 (define_expand "vec_store_lanesei<mode>"
4862 [(set (match_operand:EI 0 "neon_struct_operand")
4863 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4864 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4868 (define_insn "neon_vst3<mode>"
4869 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4870 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4871 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4875 if (<V_sz_elem> == 64)
4876 return "vst1.64\t%h1, %A0";
4878 return "vst3.<V_sz_elem>\t%h1, %A0";
4881 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4882 (const_string "neon_store1_3reg<q>")
4883 (const_string "neon_store3_one_lane<q>")))])
4885 (define_expand "vec_store_lanesci<mode>"
4886 [(match_operand:CI 0 "neon_struct_operand")
4887 (match_operand:CI 1 "s_register_operand")
4888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4891 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4895 (define_expand "neon_vst3<mode>"
4896 [(match_operand:CI 0 "neon_struct_operand")
4897 (match_operand:CI 1 "s_register_operand")
4898 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4903 mem = adjust_address (operands[0], EImode, 0);
4904 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4905 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4906 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4910 (define_insn "neon_vst3qa<mode>"
4911 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4912 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4913 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4917 int regno = REGNO (operands[1]);
4919 ops[0] = operands[0];
4920 ops[1] = gen_rtx_REG (DImode, regno);
4921 ops[2] = gen_rtx_REG (DImode, regno + 4);
4922 ops[3] = gen_rtx_REG (DImode, regno + 8);
4923 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4926 [(set_attr "type" "neon_store3_3reg<q>")]
4929 (define_insn "neon_vst3qb<mode>"
4930 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4931 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4932 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936 int regno = REGNO (operands[1]);
4938 ops[0] = operands[0];
4939 ops[1] = gen_rtx_REG (DImode, regno + 2);
4940 ops[2] = gen_rtx_REG (DImode, regno + 6);
4941 ops[3] = gen_rtx_REG (DImode, regno + 10);
4942 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4945 [(set_attr "type" "neon_store3_3reg<q>")]
4948 (define_insn "neon_vst3_lane<mode>"
4949 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4950 (unspec:<V_three_elem>
4951 [(match_operand:EI 1 "s_register_operand" "w")
4952 (match_operand:SI 2 "immediate_operand" "i")
4953 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4957 HOST_WIDE_INT lane = INTVAL (operands[2]);
4958 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4959 int regno = REGNO (operands[1]);
4961 if (lane < 0 || lane >= max)
4962 error ("lane out of range");
4963 ops[0] = operands[0];
4964 ops[1] = gen_rtx_REG (DImode, regno);
4965 ops[2] = gen_rtx_REG (DImode, regno + 2);
4966 ops[3] = gen_rtx_REG (DImode, regno + 4);
4967 ops[4] = operands[2];
4968 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4972 [(set_attr "type" "neon_store3_one_lane<q>")]
4975 (define_insn "neon_vst3_lane<mode>"
4976 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4977 (unspec:<V_three_elem>
4978 [(match_operand:CI 1 "s_register_operand" "w")
4979 (match_operand:SI 2 "immediate_operand" "i")
4980 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4984 HOST_WIDE_INT lane = INTVAL (operands[2]);
4985 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4986 int regno = REGNO (operands[1]);
4988 if (lane < 0 || lane >= max)
4989 error ("lane out of range");
4990 else if (lane >= max / 2)
4995 ops[0] = operands[0];
4996 ops[1] = gen_rtx_REG (DImode, regno);
4997 ops[2] = gen_rtx_REG (DImode, regno + 4);
4998 ops[3] = gen_rtx_REG (DImode, regno + 8);
4999 ops[4] = GEN_INT (lane);
5000 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5004 [(set_attr "type" "neon_store3_one_lane<q>")]
5007 (define_expand "vec_load_lanesoi<mode>"
5008 [(set (match_operand:OI 0 "s_register_operand")
5009 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5010 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5014 (define_insn "neon_vld4<mode>"
5015 [(set (match_operand:OI 0 "s_register_operand" "=w")
5016 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5017 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5021 if (<V_sz_elem> == 64)
5022 return "vld1.64\t%h0, %A1";
5024 return "vld4.<V_sz_elem>\t%h0, %A1";
5027 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5028 (const_string "neon_load1_4reg<q>")
5029 (const_string "neon_load4_4reg<q>")))]
5032 (define_expand "vec_load_lanesxi<mode>"
5033 [(match_operand:XI 0 "s_register_operand")
5034 (match_operand:XI 1 "neon_struct_operand")
5035 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5038 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5042 (define_expand "neon_vld4<mode>"
5043 [(match_operand:XI 0 "s_register_operand")
5044 (match_operand:XI 1 "neon_struct_operand")
5045 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5050 mem = adjust_address (operands[1], OImode, 0);
5051 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5052 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5053 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5057 (define_insn "neon_vld4qa<mode>"
5058 [(set (match_operand:XI 0 "s_register_operand" "=w")
5059 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5060 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 int regno = REGNO (operands[0]);
5066 ops[0] = gen_rtx_REG (DImode, regno);
5067 ops[1] = gen_rtx_REG (DImode, regno + 4);
5068 ops[2] = gen_rtx_REG (DImode, regno + 8);
5069 ops[3] = gen_rtx_REG (DImode, regno + 12);
5070 ops[4] = operands[1];
5071 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5074 [(set_attr "type" "neon_load4_4reg<q>")]
5077 (define_insn "neon_vld4qb<mode>"
5078 [(set (match_operand:XI 0 "s_register_operand" "=w")
5079 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5080 (match_operand:XI 2 "s_register_operand" "0")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 int regno = REGNO (operands[0]);
5087 ops[0] = gen_rtx_REG (DImode, regno + 2);
5088 ops[1] = gen_rtx_REG (DImode, regno + 6);
5089 ops[2] = gen_rtx_REG (DImode, regno + 10);
5090 ops[3] = gen_rtx_REG (DImode, regno + 14);
5091 ops[4] = operands[1];
5092 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5095 [(set_attr "type" "neon_load4_4reg<q>")]
5098 (define_insn "neon_vld4_lane<mode>"
5099 [(set (match_operand:OI 0 "s_register_operand" "=w")
5100 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5101 (match_operand:OI 2 "s_register_operand" "0")
5102 (match_operand:SI 3 "immediate_operand" "i")
5103 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107 HOST_WIDE_INT lane = INTVAL (operands[3]);
5108 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5109 int regno = REGNO (operands[0]);
5111 if (lane < 0 || lane >= max)
5112 error ("lane out of range");
5113 ops[0] = gen_rtx_REG (DImode, regno);
5114 ops[1] = gen_rtx_REG (DImode, regno + 2);
5115 ops[2] = gen_rtx_REG (DImode, regno + 4);
5116 ops[3] = gen_rtx_REG (DImode, regno + 6);
5117 ops[4] = operands[1];
5118 ops[5] = operands[3];
5119 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5123 [(set_attr "type" "neon_load4_one_lane<q>")]
5126 (define_insn "neon_vld4_lane<mode>"
5127 [(set (match_operand:XI 0 "s_register_operand" "=w")
5128 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5129 (match_operand:XI 2 "s_register_operand" "0")
5130 (match_operand:SI 3 "immediate_operand" "i")
5131 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 HOST_WIDE_INT lane = INTVAL (operands[3]);
5136 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5137 int regno = REGNO (operands[0]);
5139 if (lane < 0 || lane >= max)
5140 error ("lane out of range");
5141 else if (lane >= max / 2)
5146 ops[0] = gen_rtx_REG (DImode, regno);
5147 ops[1] = gen_rtx_REG (DImode, regno + 4);
5148 ops[2] = gen_rtx_REG (DImode, regno + 8);
5149 ops[3] = gen_rtx_REG (DImode, regno + 12);
5150 ops[4] = operands[1];
5151 ops[5] = GEN_INT (lane);
5152 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5156 [(set_attr "type" "neon_load4_one_lane<q>")]
5159 (define_insn "neon_vld4_dup<mode>"
5160 [(set (match_operand:OI 0 "s_register_operand" "=w")
5161 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5162 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5166 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5168 int regno = REGNO (operands[0]);
5170 ops[0] = gen_rtx_REG (DImode, regno);
5171 ops[1] = gen_rtx_REG (DImode, regno + 2);
5172 ops[2] = gen_rtx_REG (DImode, regno + 4);
5173 ops[3] = gen_rtx_REG (DImode, regno + 6);
5174 ops[4] = operands[1];
5175 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5180 return "vld1.<V_sz_elem>\t%h0, %A1";
5183 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5184 (const_string "neon_load4_all_lanes<q>")
5185 (const_string "neon_load1_1reg<q>")))]
5188 (define_expand "vec_store_lanesoi<mode>"
5189 [(set (match_operand:OI 0 "neon_struct_operand")
5190 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5191 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 (define_insn "neon_vst4<mode>"
5196 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5197 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5198 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202 if (<V_sz_elem> == 64)
5203 return "vst1.64\t%h1, %A0";
5205 return "vst4.<V_sz_elem>\t%h1, %A0";
5208 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5209 (const_string "neon_store1_4reg<q>")
5210 (const_string "neon_store4_4reg<q>")))]
5213 (define_expand "vec_store_lanesxi<mode>"
5214 [(match_operand:XI 0 "neon_struct_operand")
5215 (match_operand:XI 1 "s_register_operand")
5216 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5219 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5223 (define_expand "neon_vst4<mode>"
5224 [(match_operand:XI 0 "neon_struct_operand")
5225 (match_operand:XI 1 "s_register_operand")
5226 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5231 mem = adjust_address (operands[0], OImode, 0);
5232 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5233 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5234 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5238 (define_insn "neon_vst4qa<mode>"
5239 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5240 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5241 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245 int regno = REGNO (operands[1]);
5247 ops[0] = operands[0];
5248 ops[1] = gen_rtx_REG (DImode, regno);
5249 ops[2] = gen_rtx_REG (DImode, regno + 4);
5250 ops[3] = gen_rtx_REG (DImode, regno + 8);
5251 ops[4] = gen_rtx_REG (DImode, regno + 12);
5252 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5255 [(set_attr "type" "neon_store4_4reg<q>")]
5258 (define_insn "neon_vst4qb<mode>"
5259 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5260 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5261 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5265 int regno = REGNO (operands[1]);
5267 ops[0] = operands[0];
5268 ops[1] = gen_rtx_REG (DImode, regno + 2);
5269 ops[2] = gen_rtx_REG (DImode, regno + 6);
5270 ops[3] = gen_rtx_REG (DImode, regno + 10);
5271 ops[4] = gen_rtx_REG (DImode, regno + 14);
5272 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5275 [(set_attr "type" "neon_store4_4reg<q>")]
5278 (define_insn "neon_vst4_lane<mode>"
5279 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5280 (unspec:<V_four_elem>
5281 [(match_operand:OI 1 "s_register_operand" "w")
5282 (match_operand:SI 2 "immediate_operand" "i")
5283 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5287 HOST_WIDE_INT lane = INTVAL (operands[2]);
5288 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5289 int regno = REGNO (operands[1]);
5291 if (lane < 0 || lane >= max)
5292 error ("lane out of range");
5293 ops[0] = operands[0];
5294 ops[1] = gen_rtx_REG (DImode, regno);
5295 ops[2] = gen_rtx_REG (DImode, regno + 2);
5296 ops[3] = gen_rtx_REG (DImode, regno + 4);
5297 ops[4] = gen_rtx_REG (DImode, regno + 6);
5298 ops[5] = operands[2];
5299 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5303 [(set_attr "type" "neon_store4_one_lane<q>")]
5306 (define_insn "neon_vst4_lane<mode>"
5307 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5308 (unspec:<V_four_elem>
5309 [(match_operand:XI 1 "s_register_operand" "w")
5310 (match_operand:SI 2 "immediate_operand" "i")
5311 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5315 HOST_WIDE_INT lane = INTVAL (operands[2]);
5316 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5317 int regno = REGNO (operands[1]);
5319 if (lane < 0 || lane >= max)
5320 error ("lane out of range");
5321 else if (lane >= max / 2)
5326 ops[0] = operands[0];
5327 ops[1] = gen_rtx_REG (DImode, regno);
5328 ops[2] = gen_rtx_REG (DImode, regno + 4);
5329 ops[3] = gen_rtx_REG (DImode, regno + 8);
5330 ops[4] = gen_rtx_REG (DImode, regno + 12);
5331 ops[5] = GEN_INT (lane);
5332 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5336 [(set_attr "type" "neon_store4_4reg<q>")]
5339 (define_expand "neon_vand<mode>"
5340 [(match_operand:VDQX 0 "s_register_operand" "")
5341 (match_operand:VDQX 1 "s_register_operand" "")
5342 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5343 (match_operand:SI 3 "immediate_operand" "")]
5346 emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
5350 (define_expand "neon_vorr<mode>"
5351 [(match_operand:VDQX 0 "s_register_operand" "")
5352 (match_operand:VDQX 1 "s_register_operand" "")
5353 (match_operand:VDQX 2 "neon_logic_op2" "")
5354 (match_operand:SI 3 "immediate_operand" "")]
5357 emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
5361 (define_expand "neon_veor<mode>"
5362 [(match_operand:VDQX 0 "s_register_operand" "")
5363 (match_operand:VDQX 1 "s_register_operand" "")
5364 (match_operand:VDQX 2 "s_register_operand" "")
5365 (match_operand:SI 3 "immediate_operand" "")]
5368 emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
5372 (define_expand "neon_vbic<mode>"
5373 [(match_operand:VDQX 0 "s_register_operand" "")
5374 (match_operand:VDQX 1 "s_register_operand" "")
5375 (match_operand:VDQX 2 "neon_logic_op2" "")
5376 (match_operand:SI 3 "immediate_operand" "")]
5379 emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
5383 (define_expand "neon_vorn<mode>"
5384 [(match_operand:VDQX 0 "s_register_operand" "")
5385 (match_operand:VDQX 1 "s_register_operand" "")
5386 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5387 (match_operand:SI 3 "immediate_operand" "")]
5390 emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
5394 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5395 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5396 (SE:<V_unpack> (vec_select:<V_HALF>
5397 (match_operand:VU 1 "register_operand" "w")
5398 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5399 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5400 "vmovl.<US><V_sz_elem> %q0, %e1"
5401 [(set_attr "type" "neon_shift_imm_long")]
5404 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5405 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5406 (SE:<V_unpack> (vec_select:<V_HALF>
5407 (match_operand:VU 1 "register_operand" "w")
5408 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5409 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5410 "vmovl.<US><V_sz_elem> %q0, %f1"
5411 [(set_attr "type" "neon_shift_imm_long")]
5414 (define_expand "vec_unpack<US>_hi_<mode>"
5415 [(match_operand:<V_unpack> 0 "register_operand" "")
5416 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5417 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5419 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5422 for (i = 0; i < (<V_mode_nunits>/2); i++)
5423 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5425 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5426 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5433 (define_expand "vec_unpack<US>_lo_<mode>"
5434 [(match_operand:<V_unpack> 0 "register_operand" "")
5435 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5436 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5438 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5441 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5442 RTVEC_ELT (v, i) = GEN_INT (i);
5443 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5444 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5451 (define_insn "neon_vec_<US>mult_lo_<mode>"
5452 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5453 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5454 (match_operand:VU 1 "register_operand" "w")
5455 (match_operand:VU 2 "vect_par_constant_low" "")))
5456 (SE:<V_unpack> (vec_select:<V_HALF>
5457 (match_operand:VU 3 "register_operand" "w")
5459 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5460 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5461 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5464 (define_expand "vec_widen_<US>mult_lo_<mode>"
5465 [(match_operand:<V_unpack> 0 "register_operand" "")
5466 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5467 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5468 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5470 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5473 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5474 RTVEC_ELT (v, i) = GEN_INT (i);
5475 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5477 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5485 (define_insn "neon_vec_<US>mult_hi_<mode>"
5486 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5487 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5488 (match_operand:VU 1 "register_operand" "w")
5489 (match_operand:VU 2 "vect_par_constant_high" "")))
5490 (SE:<V_unpack> (vec_select:<V_HALF>
5491 (match_operand:VU 3 "register_operand" "w")
5493 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5494 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5495 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5498 (define_expand "vec_widen_<US>mult_hi_<mode>"
5499 [(match_operand:<V_unpack> 0 "register_operand" "")
5500 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5501 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5502 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5504 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5507 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5508 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5509 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5511 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5520 (define_insn "neon_vec_<US>shiftl_<mode>"
5521 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5522 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5523 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5526 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5528 [(set_attr "type" "neon_shift_imm_long")]
5531 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5532 [(match_operand:<V_unpack> 0 "register_operand" "")
5533 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5534 (match_operand:SI 2 "immediate_operand" "i")]
5535 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5537 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5538 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5544 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5545 [(match_operand:<V_unpack> 0 "register_operand" "")
5546 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5547 (match_operand:SI 2 "immediate_operand" "i")]
5548 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5550 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5551 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5552 GET_MODE_SIZE (<V_HALF>mode)),
5558 ;; Vectorize for non-neon-quad case
5559 (define_insn "neon_unpack<US>_<mode>"
5560 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5561 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5563 "vmovl.<US><V_sz_elem> %q0, %P1"
5564 [(set_attr "type" "neon_move")]
5567 (define_expand "vec_unpack<US>_lo_<mode>"
5568 [(match_operand:<V_double_width> 0 "register_operand" "")
5569 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5572 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5573 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5574 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5580 (define_expand "vec_unpack<US>_hi_<mode>"
5581 [(match_operand:<V_double_width> 0 "register_operand" "")
5582 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5585 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5586 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5587 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5593 (define_insn "neon_vec_<US>mult_<mode>"
5594 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5595 (mult:<V_widen> (SE:<V_widen>
5596 (match_operand:VDI 1 "register_operand" "w"))
5598 (match_operand:VDI 2 "register_operand" "w"))))]
5600 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5601 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5604 (define_expand "vec_widen_<US>mult_hi_<mode>"
5605 [(match_operand:<V_double_width> 0 "register_operand" "")
5606 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5607 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5610 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5611 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5612 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5619 (define_expand "vec_widen_<US>mult_lo_<mode>"
5620 [(match_operand:<V_double_width> 0 "register_operand" "")
5621 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5622 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5625 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5626 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5627 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5634 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5635 [(match_operand:<V_double_width> 0 "register_operand" "")
5636 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5637 (match_operand:SI 2 "immediate_operand" "i")]
5640 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5641 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5642 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5648 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5649 [(match_operand:<V_double_width> 0 "register_operand" "")
5650 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5651 (match_operand:SI 2 "immediate_operand" "i")]
5654 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5655 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5656 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5662 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5663 ; because the ordering of vector elements in Q registers is different from what
5664 ; the semantics of the instructions require.
5666 (define_insn "vec_pack_trunc_<mode>"
5667 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5668 (vec_concat:<V_narrow_pack>
5669 (truncate:<V_narrow>
5670 (match_operand:VN 1 "register_operand" "w"))
5671 (truncate:<V_narrow>
5672 (match_operand:VN 2 "register_operand" "w"))))]
5673 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5674 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5675 [(set_attr "type" "multiple")
5676 (set_attr "length" "8")]
5679 ;; For the non-quad case.
5680 (define_insn "neon_vec_pack_trunc_<mode>"
5681 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5682 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5683 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5684 "vmovn.i<V_sz_elem>\t%P0, %q1"
5685 [(set_attr "type" "neon_move_narrow_q")]
5688 (define_expand "vec_pack_trunc_<mode>"
5689 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5690 (match_operand:VSHFT 1 "register_operand" "")
5691 (match_operand:VSHFT 2 "register_operand")]
5692 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5694 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5696 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5697 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5698 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5702 (define_insn "neon_vabd<mode>_2"
5703 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5704 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5705 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5706 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5707 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5709 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5710 (const_string "neon_fp_abd_s<q>")
5711 (const_string "neon_abd<q>")))]
5714 (define_insn "neon_vabd<mode>_3"
5715 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5716 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5717 (match_operand:VDQ 2 "s_register_operand" "w")]
5719 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5720 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5722 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5723 (const_string "neon_fp_abd_s<q>")
5724 (const_string "neon_abd<q>")))]
5727 ;; Copy from core-to-neon regs, then extend, not vice-versa
5730 [(set (match_operand:DI 0 "s_register_operand" "")
5731 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5732 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5733 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5734 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5736 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5740 [(set (match_operand:DI 0 "s_register_operand" "")
5741 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5742 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5743 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5744 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5746 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5750 [(set (match_operand:DI 0 "s_register_operand" "")
5751 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5752 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5753 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5754 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5756 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5760 [(set (match_operand:DI 0 "s_register_operand" "")
5761 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5762 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5763 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5764 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5766 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5770 [(set (match_operand:DI 0 "s_register_operand" "")
5771 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5772 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5773 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5774 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5776 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5780 [(set (match_operand:DI 0 "s_register_operand" "")
5781 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5782 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5783 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5784 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5786 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));