1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_store1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "ior<mode>3"
633 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
634 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
635 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
638 switch (which_alternative)
640 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
641 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
642 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
643 default: gcc_unreachable ();
646 [(set_attr "type" "neon_logic<q>")]
649 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
650 ;; vorr. We support the pseudo-instruction vand instead, because that
651 ;; corresponds to the canonical form the middle-end expects to use for
652 ;; immediate bitwise-ANDs.
654 (define_insn "and<mode>3"
655 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
656 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
657 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
660 switch (which_alternative)
662 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
663 case 1: return neon_output_logic_immediate ("vand", &operands[2],
664 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
665 default: gcc_unreachable ();
668 [(set_attr "type" "neon_logic<q>")]
671 (define_insn "orn<mode>3_neon"
672 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
673 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
674 (match_operand:VDQ 1 "s_register_operand" "w")))]
676 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_logic<q>")]
680 ;; TODO: investigate whether we should disable
681 ;; this and bicdi3_neon for the A8 in line with the other
683 (define_insn_and_split "orndi3_neon"
684 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
685 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
686 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
694 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
695 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
696 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
701 operands[3] = gen_highpart (SImode, operands[0]);
702 operands[0] = gen_lowpart (SImode, operands[0]);
703 operands[4] = gen_highpart (SImode, operands[2]);
704 operands[2] = gen_lowpart (SImode, operands[2]);
705 operands[5] = gen_highpart (SImode, operands[1]);
706 operands[1] = gen_lowpart (SImode, operands[1]);
710 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
711 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
715 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
716 (set_attr "length" "*,16,8,8")
717 (set_attr "arch" "any,a,t2,t2")]
720 (define_insn "bic<mode>3_neon"
721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
722 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
723 (match_operand:VDQ 1 "s_register_operand" "w")))]
725 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_logic<q>")]
729 ;; Compare to *anddi_notdi_di.
730 (define_insn "bicdi3_neon"
731 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
732 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
733 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
739 [(set_attr "type" "neon_logic,multiple,multiple")
740 (set_attr "length" "*,8,8")]
743 (define_insn "xor<mode>3"
744 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
745 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
746 (match_operand:VDQ 2 "s_register_operand" "w")))]
748 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
749 [(set_attr "type" "neon_logic<q>")]
752 (define_insn "one_cmpl<mode>2"
753 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
754 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
756 "vmvn\t%<V_reg>0, %<V_reg>1"
757 [(set_attr "type" "neon_move<q>")]
760 (define_insn "abs<mode>2"
761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
762 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
764 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
766 (if_then_else (match_test "<Is_float_mode>")
767 (const_string "neon_fp_abs_s<q>")
768 (const_string "neon_abs<q>")))]
771 (define_insn "neg<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_neg_s<q>")
779 (const_string "neon_neg<q>")))]
782 (define_insn "negdi2_neon"
783 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
784 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
785 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
786 (clobber (reg:CC CC_REGNUM))]
789 [(set_attr "length" "8")
790 (set_attr "type" "multiple")]
793 ; Split negdi2_neon for vfp registers
795 [(set (match_operand:DI 0 "s_register_operand" "")
796 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
797 (clobber (match_scratch:DI 2 ""))
798 (clobber (reg:CC CC_REGNUM))]
799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
800 [(set (match_dup 2) (const_int 0))
801 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
802 (clobber (reg:CC CC_REGNUM))])]
804 if (!REG_P (operands[2]))
805 operands[2] = operands[0];
809 ; Split negdi2_neon for core registers
811 [(set (match_operand:DI 0 "s_register_operand" "")
812 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
813 (clobber (match_scratch:DI 2 ""))
814 (clobber (reg:CC CC_REGNUM))]
815 "TARGET_32BIT && reload_completed
816 && arm_general_register_operand (operands[0], DImode)"
817 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
818 (clobber (reg:CC CC_REGNUM))])]
822 (define_insn "*umin<mode>3_neon"
823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
824 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
825 (match_operand:VDQIW 2 "s_register_operand" "w")))]
827 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
828 [(set_attr "type" "neon_minmax<q>")]
831 (define_insn "*umax<mode>3_neon"
832 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
833 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
834 (match_operand:VDQIW 2 "s_register_operand" "w")))]
836 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_minmax<q>")]
840 (define_insn "*smin<mode>3_neon"
841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
842 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
843 (match_operand:VDQW 2 "s_register_operand" "w")))]
845 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
847 (if_then_else (match_test "<Is_float_mode>")
848 (const_string "neon_fp_minmax_s<q>")
849 (const_string "neon_minmax<q>")))]
852 (define_insn "*smax<mode>3_neon"
853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
854 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
855 (match_operand:VDQW 2 "s_register_operand" "w")))]
857 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
859 (if_then_else (match_test "<Is_float_mode>")
860 (const_string "neon_fp_minmax_s<q>")
861 (const_string "neon_minmax<q>")))]
864 ; TODO: V2DI shifts are current disabled because there are bugs in the
865 ; generic vectorizer code. It ends up creating a V2DI constructor with
868 (define_insn "vashl<mode>3"
869 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
870 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
871 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
874 switch (which_alternative)
876 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
877 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
879 VALID_NEON_QREG_MODE (<MODE>mode),
881 default: gcc_unreachable ();
884 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
887 (define_insn "vashr<mode>3_imm"
888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
889 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
890 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
893 return neon_output_shift_immediate ("vshr", 's', &operands[2],
894 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
897 [(set_attr "type" "neon_shift_imm<q>")]
900 (define_insn "vlshr<mode>3_imm"
901 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
902 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
903 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
906 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
907 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
910 [(set_attr "type" "neon_shift_imm<q>")]
913 ; Used for implementing logical shift-right, which is a left-shift by a negative
914 ; amount, with signed operands. This is essentially the same as ashl<mode>3
915 ; above, but using an unspec in case GCC tries anything tricky with negative
918 (define_insn "ashl<mode>3_signed"
919 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
920 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
921 (match_operand:VDQI 2 "s_register_operand" "w")]
922 UNSPEC_ASHIFT_SIGNED))]
924 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
925 [(set_attr "type" "neon_shift_reg<q>")]
928 ; Used for implementing logical shift-right, which is a left-shift by a negative
929 ; amount, with unsigned operands.
931 (define_insn "ashl<mode>3_unsigned"
932 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
933 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
934 (match_operand:VDQI 2 "s_register_operand" "w")]
935 UNSPEC_ASHIFT_UNSIGNED))]
937 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
938 [(set_attr "type" "neon_shift_reg<q>")]
941 (define_expand "vashr<mode>3"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "")
943 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
944 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
947 if (s_register_operand (operands[2], <MODE>mode))
949 rtx neg = gen_reg_rtx (<MODE>mode);
950 emit_insn (gen_neg<mode>2 (neg, operands[2]));
951 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
954 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
958 (define_expand "vlshr<mode>3"
959 [(set (match_operand:VDQIW 0 "s_register_operand" "")
960 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
961 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
964 if (s_register_operand (operands[2], <MODE>mode))
966 rtx neg = gen_reg_rtx (<MODE>mode);
967 emit_insn (gen_neg<mode>2 (neg, operands[2]));
968 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
971 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
977 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
978 ;; leaving the upper half uninitalized. This is OK since the shift
979 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
980 ;; data flow analysis however, we pretend the full register is set
982 (define_insn "neon_load_count"
983 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
984 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
988 vld1.32\t{%P0[0]}, %A1
990 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
993 (define_insn "ashldi3_neon_noclobber"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
996 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
997 "TARGET_NEON && reload_completed
998 && (!CONST_INT_P (operands[2])
999 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1001 vshl.u64\t%P0, %P1, %2
1002 vshl.u64\t%P0, %P1, %P2"
1003 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1006 (define_insn_and_split "ashldi3_neon"
1007 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1008 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1009 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1010 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1011 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1012 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1013 (clobber (reg:CC_C CC_REGNUM))]
1016 "TARGET_NEON && reload_completed"
1020 if (IS_VFP_REGNUM (REGNO (operands[0])))
1022 if (CONST_INT_P (operands[2]))
1024 if (INTVAL (operands[2]) < 1)
1026 emit_insn (gen_movdi (operands[0], operands[1]));
1029 else if (INTVAL (operands[2]) > 63)
1030 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1034 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1035 operands[2] = operands[5];
1038 /* Ditch the unnecessary clobbers. */
1039 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1044 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1045 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1046 || REGNO (operands[0]) == REGNO (operands[1])))
1047 /* This clobbers CC. */
1048 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1050 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1051 operands[2], operands[3], operands[4]);
1055 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1056 (set_attr "opt" "*,*,speed,speed,*,*")
1057 (set_attr "type" "multiple")]
1060 ; The shift amount needs to be negated for right-shifts
1061 (define_insn "signed_shift_di3_neon"
1062 [(set (match_operand:DI 0 "s_register_operand" "=w")
1063 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1064 (match_operand:DI 2 "s_register_operand" " w")]
1065 UNSPEC_ASHIFT_SIGNED))]
1066 "TARGET_NEON && reload_completed"
1067 "vshl.s64\t%P0, %P1, %P2"
1068 [(set_attr "type" "neon_shift_reg")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "unsigned_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_UNSIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.u64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 (define_insn "ashrdi3_neon_imm_noclobber"
1083 [(set (match_operand:DI 0 "s_register_operand" "=w")
1084 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1085 (match_operand:DI 2 "const_int_operand" " i")))]
1086 "TARGET_NEON && reload_completed
1087 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1088 "vshr.s64\t%P0, %P1, %2"
1089 [(set_attr "type" "neon_shift_imm")]
1092 (define_insn "lshrdi3_neon_imm_noclobber"
1093 [(set (match_operand:DI 0 "s_register_operand" "=w")
1094 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1095 (match_operand:DI 2 "const_int_operand" " i")))]
1096 "TARGET_NEON && reload_completed
1097 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1098 "vshr.u64\t%P0, %P1, %2"
1099 [(set_attr "type" "neon_shift_imm")]
1104 (define_insn_and_split "<shift>di3_neon"
1105 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1106 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1107 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1108 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1109 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1110 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1111 (clobber (reg:CC CC_REGNUM))]
1114 "TARGET_NEON && reload_completed"
1118 if (IS_VFP_REGNUM (REGNO (operands[0])))
1120 if (CONST_INT_P (operands[2]))
1122 if (INTVAL (operands[2]) < 1)
1124 emit_insn (gen_movdi (operands[0], operands[1]));
1127 else if (INTVAL (operands[2]) > 64)
1128 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1130 /* Ditch the unnecessary clobbers. */
1131 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1137 /* We must use a negative left-shift. */
1138 emit_insn (gen_negsi2 (operands[3], operands[2]));
1139 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1140 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1146 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1147 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1148 || REGNO (operands[0]) == REGNO (operands[1])))
1149 /* This clobbers CC. */
1150 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1152 /* This clobbers CC (ASHIFTRT by register only). */
1153 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1154 operands[2], operands[3], operands[4]);
1159 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1160 (set_attr "opt" "*,*,speed,speed,*,*")
1161 (set_attr "type" "multiple")]
1164 ;; Widening operations
1166 (define_insn "widen_ssum<mode>3"
1167 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1168 (plus:<V_widen> (sign_extend:<V_widen>
1169 (match_operand:VW 1 "s_register_operand" "%w"))
1170 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1172 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1173 [(set_attr "type" "neon_add_widen")]
1176 (define_insn "widen_usum<mode>3"
1177 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1178 (plus:<V_widen> (zero_extend:<V_widen>
1179 (match_operand:VW 1 "s_register_operand" "%w"))
1180 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1183 [(set_attr "type" "neon_add_widen")]
1186 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1187 ;; shift-count granularity. That's good enough for the middle-end's current
1190 ;; Note that it's not safe to perform such an operation in big-endian mode,
1191 ;; due to element-ordering issues.
1193 (define_expand "vec_shr_<mode>"
1194 [(match_operand:VDQ 0 "s_register_operand" "")
1195 (match_operand:VDQ 1 "s_register_operand" "")
1196 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1197 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1200 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1201 const int width = GET_MODE_BITSIZE (<MODE>mode);
1202 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1203 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1204 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1206 if (num_bits == width)
1208 emit_move_insn (operands[0], operands[1]);
1212 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1213 operands[0] = gen_lowpart (bvecmode, operands[0]);
1214 operands[1] = gen_lowpart (bvecmode, operands[1]);
1216 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1217 GEN_INT (num_bits / BITS_PER_UNIT)));
1221 (define_expand "vec_shl_<mode>"
1222 [(match_operand:VDQ 0 "s_register_operand" "")
1223 (match_operand:VDQ 1 "s_register_operand" "")
1224 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1225 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1228 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1229 const int width = GET_MODE_BITSIZE (<MODE>mode);
1230 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1231 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1232 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1236 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1240 num_bits = width - num_bits;
1242 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1243 operands[0] = gen_lowpart (bvecmode, operands[0]);
1244 operands[1] = gen_lowpart (bvecmode, operands[1]);
1246 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1247 GEN_INT (num_bits / BITS_PER_UNIT)));
1251 ;; Helpers for quad-word reduction operations
1253 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1254 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1255 ; N/2-element vector.
1257 (define_insn "quad_halves_<code>v4si"
1258 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1260 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1261 (parallel [(const_int 0) (const_int 1)]))
1262 (vec_select:V2SI (match_dup 1)
1263 (parallel [(const_int 2) (const_int 3)]))))]
1265 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1266 [(set_attr "vqh_mnem" "<VQH_mnem>")
1267 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1270 (define_insn "quad_halves_<code>v4sf"
1271 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1273 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1274 (parallel [(const_int 0) (const_int 1)]))
1275 (vec_select:V2SF (match_dup 1)
1276 (parallel [(const_int 2) (const_int 3)]))))]
1277 "TARGET_NEON && flag_unsafe_math_optimizations"
1278 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1279 [(set_attr "vqh_mnem" "<VQH_mnem>")
1280 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1283 (define_insn "quad_halves_<code>v8hi"
1284 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1286 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1287 (parallel [(const_int 0) (const_int 1)
1288 (const_int 2) (const_int 3)]))
1289 (vec_select:V4HI (match_dup 1)
1290 (parallel [(const_int 4) (const_int 5)
1291 (const_int 6) (const_int 7)]))))]
1293 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1294 [(set_attr "vqh_mnem" "<VQH_mnem>")
1295 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1298 (define_insn "quad_halves_<code>v16qi"
1299 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1301 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1302 (parallel [(const_int 0) (const_int 1)
1303 (const_int 2) (const_int 3)
1304 (const_int 4) (const_int 5)
1305 (const_int 6) (const_int 7)]))
1306 (vec_select:V8QI (match_dup 1)
1307 (parallel [(const_int 8) (const_int 9)
1308 (const_int 10) (const_int 11)
1309 (const_int 12) (const_int 13)
1310 (const_int 14) (const_int 15)]))))]
1312 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1313 [(set_attr "vqh_mnem" "<VQH_mnem>")
1314 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1317 (define_expand "move_hi_quad_<mode>"
1318 [(match_operand:ANY128 0 "s_register_operand" "")
1319 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1322 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1323 GET_MODE_SIZE (<V_HALF>mode)),
1328 (define_expand "move_lo_quad_<mode>"
1329 [(match_operand:ANY128 0 "s_register_operand" "")
1330 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1333 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1339 ;; Reduction operations
1341 (define_expand "reduc_splus_<mode>"
1342 [(match_operand:VD 0 "s_register_operand" "")
1343 (match_operand:VD 1 "s_register_operand" "")]
1344 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1346 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1347 &gen_neon_vpadd_internal<mode>);
1351 (define_expand "reduc_splus_<mode>"
1352 [(match_operand:VQ 0 "s_register_operand" "")
1353 (match_operand:VQ 1 "s_register_operand" "")]
1354 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1355 && !BYTES_BIG_ENDIAN"
1357 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1358 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1360 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1361 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1362 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1367 (define_insn "reduc_splus_v2di"
1368 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1369 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1371 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1372 "vadd.i64\t%e0, %e1, %f1"
1373 [(set_attr "type" "neon_add_q")]
1376 ;; NEON does not distinguish between signed and unsigned addition except on
1377 ;; widening operations.
1378 (define_expand "reduc_uplus_<mode>"
1379 [(match_operand:VDQI 0 "s_register_operand" "")
1380 (match_operand:VDQI 1 "s_register_operand" "")]
1381 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1383 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1387 (define_expand "reduc_smin_<mode>"
1388 [(match_operand:VD 0 "s_register_operand" "")
1389 (match_operand:VD 1 "s_register_operand" "")]
1390 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1392 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1393 &gen_neon_vpsmin<mode>);
1397 (define_expand "reduc_smin_<mode>"
1398 [(match_operand:VQ 0 "s_register_operand" "")
1399 (match_operand:VQ 1 "s_register_operand" "")]
1400 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1401 && !BYTES_BIG_ENDIAN"
1403 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1404 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1406 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1407 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1408 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1413 (define_expand "reduc_smax_<mode>"
1414 [(match_operand:VD 0 "s_register_operand" "")
1415 (match_operand:VD 1 "s_register_operand" "")]
1416 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1418 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1419 &gen_neon_vpsmax<mode>);
1423 (define_expand "reduc_smax_<mode>"
1424 [(match_operand:VQ 0 "s_register_operand" "")
1425 (match_operand:VQ 1 "s_register_operand" "")]
1426 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1427 && !BYTES_BIG_ENDIAN"
1429 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1430 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1432 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1433 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1434 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1439 (define_expand "reduc_umin_<mode>"
1440 [(match_operand:VDI 0 "s_register_operand" "")
1441 (match_operand:VDI 1 "s_register_operand" "")]
1444 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1445 &gen_neon_vpumin<mode>);
1449 (define_expand "reduc_umin_<mode>"
1450 [(match_operand:VQI 0 "s_register_operand" "")
1451 (match_operand:VQI 1 "s_register_operand" "")]
1452 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1454 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1455 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1457 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1458 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1459 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1464 (define_expand "reduc_umax_<mode>"
1465 [(match_operand:VDI 0 "s_register_operand" "")
1466 (match_operand:VDI 1 "s_register_operand" "")]
1469 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1470 &gen_neon_vpumax<mode>);
1474 (define_expand "reduc_umax_<mode>"
1475 [(match_operand:VQI 0 "s_register_operand" "")
1476 (match_operand:VQI 1 "s_register_operand" "")]
1477 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1479 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1480 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1482 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1483 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1484 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1489 (define_insn "neon_vpadd_internal<mode>"
1490 [(set (match_operand:VD 0 "s_register_operand" "=w")
1491 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1492 (match_operand:VD 2 "s_register_operand" "w")]
1495 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1496 ;; Assume this schedules like vadd.
1498 (if_then_else (match_test "<Is_float_mode>")
1499 (const_string "neon_fp_reduc_add_s<q>")
1500 (const_string "neon_reduc_add<q>")))]
1503 (define_insn "neon_vpsmin<mode>"
1504 [(set (match_operand:VD 0 "s_register_operand" "=w")
1505 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1506 (match_operand:VD 2 "s_register_operand" "w")]
1509 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1511 (if_then_else (match_test "<Is_float_mode>")
1512 (const_string "neon_fp_reduc_minmax_s<q>")
1513 (const_string "neon_reduc_minmax<q>")))]
1516 (define_insn "neon_vpsmax<mode>"
1517 [(set (match_operand:VD 0 "s_register_operand" "=w")
1518 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1519 (match_operand:VD 2 "s_register_operand" "w")]
1522 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1524 (if_then_else (match_test "<Is_float_mode>")
1525 (const_string "neon_fp_reduc_minmax_s<q>")
1526 (const_string "neon_reduc_minmax<q>")))]
1529 (define_insn "neon_vpumin<mode>"
1530 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1531 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1532 (match_operand:VDI 2 "s_register_operand" "w")]
1535 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1536 [(set_attr "type" "neon_reduc_minmax<q>")]
1539 (define_insn "neon_vpumax<mode>"
1540 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1541 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1542 (match_operand:VDI 2 "s_register_operand" "w")]
1545 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1546 [(set_attr "type" "neon_reduc_minmax<q>")]
1549 ;; Saturating arithmetic
1551 ; NOTE: Neon supports many more saturating variants of instructions than the
1552 ; following, but these are all GCC currently understands.
1553 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1554 ; yet either, although these patterns may be used by intrinsics when they're
1557 (define_insn "*ss_add<mode>_neon"
1558 [(set (match_operand:VD 0 "s_register_operand" "=w")
1559 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1560 (match_operand:VD 2 "s_register_operand" "w")))]
1562 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1563 [(set_attr "type" "neon_qadd<q>")]
1566 (define_insn "*us_add<mode>_neon"
1567 [(set (match_operand:VD 0 "s_register_operand" "=w")
1568 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1569 (match_operand:VD 2 "s_register_operand" "w")))]
1571 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1572 [(set_attr "type" "neon_qadd<q>")]
1575 (define_insn "*ss_sub<mode>_neon"
1576 [(set (match_operand:VD 0 "s_register_operand" "=w")
1577 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1578 (match_operand:VD 2 "s_register_operand" "w")))]
1580 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1581 [(set_attr "type" "neon_qsub<q>")]
1584 (define_insn "*us_sub<mode>_neon"
1585 [(set (match_operand:VD 0 "s_register_operand" "=w")
1586 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1587 (match_operand:VD 2 "s_register_operand" "w")))]
1589 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1590 [(set_attr "type" "neon_qsub<q>")]
1593 ;; Conditional instructions. These are comparisons with conditional moves for
1594 ;; vectors. They perform the assignment:
1596 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1598 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1601 (define_expand "vcond<mode><mode>"
1602 [(set (match_operand:VDQW 0 "s_register_operand" "")
1604 (match_operator 3 "comparison_operator"
1605 [(match_operand:VDQW 4 "s_register_operand" "")
1606 (match_operand:VDQW 5 "nonmemory_operand" "")])
1607 (match_operand:VDQW 1 "s_register_operand" "")
1608 (match_operand:VDQW 2 "s_register_operand" "")))]
1609 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1611 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1613 rtx magic_rtx = GEN_INT (magic_word);
1615 int use_zero_form = 0;
1616 int swap_bsl_operands = 0;
1617 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1618 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1620 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1621 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1623 switch (GET_CODE (operands[3]))
1630 if (operands[5] == CONST0_RTX (<MODE>mode))
1637 if (!REG_P (operands[5]))
1638 operands[5] = force_reg (<MODE>mode, operands[5]);
1641 switch (GET_CODE (operands[3]))
1651 base_comparison = gen_neon_vcge<mode>;
1652 complimentary_comparison = gen_neon_vcgt<mode>;
1660 base_comparison = gen_neon_vcgt<mode>;
1661 complimentary_comparison = gen_neon_vcge<mode>;
1666 base_comparison = gen_neon_vceq<mode>;
1667 complimentary_comparison = gen_neon_vceq<mode>;
1673 switch (GET_CODE (operands[3]))
1680 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1681 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1687 Note that there also exist direct comparison against 0 forms,
1688 so catch those as a special case. */
1692 switch (GET_CODE (operands[3]))
1695 base_comparison = gen_neon_vclt<mode>;
1698 base_comparison = gen_neon_vcle<mode>;
1701 /* Do nothing, other zero form cases already have the correct
1708 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1710 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1717 /* Vector compare returns false for lanes which are unordered, so if we use
1718 the inverse of the comparison we actually want to emit, then
1719 swap the operands to BSL, we will end up with the correct result.
1720 Note that a NE NaN and NaN NE b are true for all a, b.
1722 Our transformations are:
1727 a NE b -> !(a EQ b) */
1730 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1732 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1734 swap_bsl_operands = 1;
1737 /* We check (a > b || b > a). combining these comparisons give us
1738 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1739 will then give us (a == b || a UNORDERED b) as intended. */
1741 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1742 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1743 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1744 swap_bsl_operands = 1;
1747 /* Operands are ORDERED iff (a > b || b >= a).
1748 Swapping the operands to BSL will give the UNORDERED case. */
1749 swap_bsl_operands = 1;
1752 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1753 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1754 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1760 if (swap_bsl_operands)
1761 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1764 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1769 (define_expand "vcondu<mode><mode>"
1770 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1772 (match_operator 3 "arm_comparison_operator"
1773 [(match_operand:VDQIW 4 "s_register_operand" "")
1774 (match_operand:VDQIW 5 "s_register_operand" "")])
1775 (match_operand:VDQIW 1 "s_register_operand" "")
1776 (match_operand:VDQIW 2 "s_register_operand" "")))]
1780 int inverse = 0, immediate_zero = 0;
1782 mask = gen_reg_rtx (<V_cmp_result>mode);
1784 if (operands[5] == CONST0_RTX (<MODE>mode))
1786 else if (!REG_P (operands[5]))
1787 operands[5] = force_reg (<MODE>mode, operands[5]);
1789 switch (GET_CODE (operands[3]))
1792 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1797 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1802 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1808 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1811 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1817 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1820 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1825 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1835 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1838 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1844 ;; Patterns for builtins.
1846 ; good for plain vadd, vaddq.
1848 (define_expand "neon_vadd<mode>"
1849 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1850 (match_operand:VCVTF 1 "s_register_operand" "w")
1851 (match_operand:VCVTF 2 "s_register_operand" "w")
1852 (match_operand:SI 3 "immediate_operand" "i")]
1855 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1856 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1858 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1863 ; Note that NEON operations don't support the full IEEE 754 standard: in
1864 ; particular, denormal values are flushed to zero. This means that GCC cannot
1865 ; use those instructions for autovectorization, etc. unless
1866 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1867 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1868 ; header) must work in either case: if -funsafe-math-optimizations is given,
1869 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1870 ; expand to unspecs (which may potentially limit the extent to which they might
1871 ; be optimized by generic code).
1873 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1875 (define_insn "neon_vadd<mode>_unspec"
1876 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1877 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1878 (match_operand:VCVTF 2 "s_register_operand" "w")]
1881 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1883 (if_then_else (match_test "<Is_float_mode>")
1884 (const_string "neon_fp_addsub_s<q>")
1885 (const_string "neon_add<q>")))]
1888 ; operand 3 represents in bits:
1889 ; bit 0: signed (vs unsigned).
1890 ; bit 1: rounding (vs none).
1892 (define_insn "neon_vaddl<mode>"
1893 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1894 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1895 (match_operand:VDI 2 "s_register_operand" "w")
1896 (match_operand:SI 3 "immediate_operand" "i")]
1899 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1900 [(set_attr "type" "neon_add_long")]
1903 (define_insn "neon_vaddw<mode>"
1904 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1905 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1906 (match_operand:VDI 2 "s_register_operand" "w")
1907 (match_operand:SI 3 "immediate_operand" "i")]
1910 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1911 [(set_attr "type" "neon_add_widen")]
1916 (define_insn "neon_vhadd<mode>"
1917 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1918 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1919 (match_operand:VDQIW 2 "s_register_operand" "w")
1920 (match_operand:SI 3 "immediate_operand" "i")]
1923 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1924 [(set_attr "type" "neon_add_halve_q")]
1927 (define_insn "neon_vqadd<mode>"
1928 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1929 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1930 (match_operand:VDQIX 2 "s_register_operand" "w")
1931 (match_operand:SI 3 "immediate_operand" "i")]
1934 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1935 [(set_attr "type" "neon_qadd<q>")]
1938 (define_insn "neon_vaddhn<mode>"
1939 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1940 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1941 (match_operand:VN 2 "s_register_operand" "w")
1942 (match_operand:SI 3 "immediate_operand" "i")]
1945 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1946 [(set_attr "type" "neon_add_halve_narrow_q")]
1949 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1950 ;; polynomial multiplication case that can specified by operand 3.
1951 (define_insn "neon_vmul<mode>"
1952 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1953 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1954 (match_operand:VDQW 2 "s_register_operand" "w")
1955 (match_operand:SI 3 "immediate_operand" "i")]
1958 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1960 (if_then_else (match_test "<Is_float_mode>")
1961 (const_string "neon_fp_mul_s<q>")
1962 (const_string "neon_mul_<V_elem_ch><q>")))]
1965 (define_expand "neon_vmla<mode>"
1966 [(match_operand:VDQW 0 "s_register_operand" "=w")
1967 (match_operand:VDQW 1 "s_register_operand" "0")
1968 (match_operand:VDQW 2 "s_register_operand" "w")
1969 (match_operand:VDQW 3 "s_register_operand" "w")
1970 (match_operand:SI 4 "immediate_operand" "i")]
1973 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1974 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1975 operands[2], operands[3]));
1977 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1978 operands[2], operands[3]));
1982 (define_expand "neon_vfma<VCVTF:mode>"
1983 [(match_operand:VCVTF 0 "s_register_operand")
1984 (match_operand:VCVTF 1 "s_register_operand")
1985 (match_operand:VCVTF 2 "s_register_operand")
1986 (match_operand:VCVTF 3 "s_register_operand")
1987 (match_operand:SI 4 "immediate_operand")]
1988 "TARGET_NEON && TARGET_FMA"
1990 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1995 (define_expand "neon_vfms<VCVTF:mode>"
1996 [(match_operand:VCVTF 0 "s_register_operand")
1997 (match_operand:VCVTF 1 "s_register_operand")
1998 (match_operand:VCVTF 2 "s_register_operand")
1999 (match_operand:VCVTF 3 "s_register_operand")
2000 (match_operand:SI 4 "immediate_operand")]
2001 "TARGET_NEON && TARGET_FMA"
2003 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2008 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2010 (define_insn "neon_vmla<mode>_unspec"
2011 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2012 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2013 (match_operand:VDQW 2 "s_register_operand" "w")
2014 (match_operand:VDQW 3 "s_register_operand" "w")]
2017 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2019 (if_then_else (match_test "<Is_float_mode>")
2020 (const_string "neon_fp_mla_s<q>")
2021 (const_string "neon_mla_<V_elem_ch><q>")))]
2024 (define_insn "neon_vmlal<mode>"
2025 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2026 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2027 (match_operand:VW 2 "s_register_operand" "w")
2028 (match_operand:VW 3 "s_register_operand" "w")
2029 (match_operand:SI 4 "immediate_operand" "i")]
2032 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2033 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2036 (define_expand "neon_vmls<mode>"
2037 [(match_operand:VDQW 0 "s_register_operand" "=w")
2038 (match_operand:VDQW 1 "s_register_operand" "0")
2039 (match_operand:VDQW 2 "s_register_operand" "w")
2040 (match_operand:VDQW 3 "s_register_operand" "w")
2041 (match_operand:SI 4 "immediate_operand" "i")]
2044 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2045 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2046 operands[1], operands[2], operands[3]));
2048 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2049 operands[2], operands[3]));
2053 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2055 (define_insn "neon_vmls<mode>_unspec"
2056 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2057 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2058 (match_operand:VDQW 2 "s_register_operand" "w")
2059 (match_operand:VDQW 3 "s_register_operand" "w")]
2062 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2064 (if_then_else (match_test "<Is_float_mode>")
2065 (const_string "neon_fp_mla_s<q>")
2066 (const_string "neon_mla_<V_elem_ch><q>")))]
2069 (define_insn "neon_vmlsl<mode>"
2070 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2071 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2072 (match_operand:VW 2 "s_register_operand" "w")
2073 (match_operand:VW 3 "s_register_operand" "w")
2074 (match_operand:SI 4 "immediate_operand" "i")]
2077 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2078 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2081 (define_insn "neon_vqdmulh<mode>"
2082 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2083 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2084 (match_operand:VMDQI 2 "s_register_operand" "w")
2085 (match_operand:SI 3 "immediate_operand" "i")]
2088 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2089 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2092 (define_insn "neon_vqdmlal<mode>"
2093 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2094 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2095 (match_operand:VMDI 2 "s_register_operand" "w")
2096 (match_operand:VMDI 3 "s_register_operand" "w")
2097 (match_operand:SI 4 "immediate_operand" "i")]
2100 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2101 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2104 (define_insn "neon_vqdmlsl<mode>"
2105 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2106 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2107 (match_operand:VMDI 2 "s_register_operand" "w")
2108 (match_operand:VMDI 3 "s_register_operand" "w")
2109 (match_operand:SI 4 "immediate_operand" "i")]
2112 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2113 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2116 (define_insn "neon_vmull<mode>"
2117 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2118 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2119 (match_operand:VW 2 "s_register_operand" "w")
2120 (match_operand:SI 3 "immediate_operand" "i")]
2123 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2124 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2127 (define_insn "neon_vqdmull<mode>"
2128 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2129 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2130 (match_operand:VMDI 2 "s_register_operand" "w")
2131 (match_operand:SI 3 "immediate_operand" "i")]
2134 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2135 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2138 (define_expand "neon_vsub<mode>"
2139 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2140 (match_operand:VCVTF 1 "s_register_operand" "w")
2141 (match_operand:VCVTF 2 "s_register_operand" "w")
2142 (match_operand:SI 3 "immediate_operand" "i")]
2145 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2146 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2148 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2153 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2155 (define_insn "neon_vsub<mode>_unspec"
2156 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2157 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2158 (match_operand:VCVTF 2 "s_register_operand" "w")]
2161 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2163 (if_then_else (match_test "<Is_float_mode>")
2164 (const_string "neon_fp_addsub_s<q>")
2165 (const_string "neon_sub<q>")))]
2168 (define_insn "neon_vsubl<mode>"
2169 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2170 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2171 (match_operand:VDI 2 "s_register_operand" "w")
2172 (match_operand:SI 3 "immediate_operand" "i")]
2175 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2176 [(set_attr "type" "neon_sub_long")]
2179 (define_insn "neon_vsubw<mode>"
2180 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2181 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2182 (match_operand:VDI 2 "s_register_operand" "w")
2183 (match_operand:SI 3 "immediate_operand" "i")]
2186 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2187 [(set_attr "type" "neon_sub_widen")]
2190 (define_insn "neon_vqsub<mode>"
2191 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2192 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2193 (match_operand:VDQIX 2 "s_register_operand" "w")
2194 (match_operand:SI 3 "immediate_operand" "i")]
2197 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2198 [(set_attr "type" "neon_qsub<q>")]
2201 (define_insn "neon_vhsub<mode>"
2202 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2203 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2204 (match_operand:VDQIW 2 "s_register_operand" "w")
2205 (match_operand:SI 3 "immediate_operand" "i")]
2208 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2209 [(set_attr "type" "neon_sub_halve<q>")]
2212 (define_insn "neon_vsubhn<mode>"
2213 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2214 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2215 (match_operand:VN 2 "s_register_operand" "w")
2216 (match_operand:SI 3 "immediate_operand" "i")]
2219 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2220 [(set_attr "type" "neon_sub_halve_narrow_q")]
2223 (define_insn "neon_vceq<mode>"
2224 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2225 (unspec:<V_cmp_result>
2226 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2227 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2228 (match_operand:SI 3 "immediate_operand" "i,i")]
2232 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2233 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2235 (if_then_else (match_test "<Is_float_mode>")
2236 (const_string "neon_fp_compare_s<q>")
2237 (if_then_else (match_operand 2 "zero_operand")
2238 (const_string "neon_compare_zero<q>")
2239 (const_string "neon_compare<q>"))))]
2242 (define_insn "neon_vcge<mode>"
2243 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2244 (unspec:<V_cmp_result>
2245 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2246 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2247 (match_operand:SI 3 "immediate_operand" "i,i")]
2251 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2252 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2254 (if_then_else (match_test "<Is_float_mode>")
2255 (const_string "neon_fp_compare_s<q>")
2256 (if_then_else (match_operand 2 "zero_operand")
2257 (const_string "neon_compare_zero<q>")
2258 (const_string "neon_compare<q>"))))]
2261 (define_insn "neon_vcgeu<mode>"
2262 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2263 (unspec:<V_cmp_result>
2264 [(match_operand:VDQIW 1 "s_register_operand" "w")
2265 (match_operand:VDQIW 2 "s_register_operand" "w")
2266 (match_operand:SI 3 "immediate_operand" "i")]
2269 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2270 [(set_attr "type" "neon_compare<q>")]
2273 (define_insn "neon_vcgt<mode>"
2274 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2275 (unspec:<V_cmp_result>
2276 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2277 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2278 (match_operand:SI 3 "immediate_operand" "i,i")]
2282 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2283 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2285 (if_then_else (match_test "<Is_float_mode>")
2286 (const_string "neon_fp_compare_s<q>")
2287 (if_then_else (match_operand 2 "zero_operand")
2288 (const_string "neon_compare_zero<q>")
2289 (const_string "neon_compare<q>"))))]
2292 (define_insn "neon_vcgtu<mode>"
2293 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2294 (unspec:<V_cmp_result>
2295 [(match_operand:VDQIW 1 "s_register_operand" "w")
2296 (match_operand:VDQIW 2 "s_register_operand" "w")
2297 (match_operand:SI 3 "immediate_operand" "i")]
2300 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2301 [(set_attr "type" "neon_compare<q>")]
2304 ;; VCLE and VCLT only support comparisons with immediate zero (register
2305 ;; variants are VCGE and VCGT with operands reversed).
2307 (define_insn "neon_vcle<mode>"
2308 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2309 (unspec:<V_cmp_result>
2310 [(match_operand:VDQW 1 "s_register_operand" "w")
2311 (match_operand:VDQW 2 "zero_operand" "Dz")
2312 (match_operand:SI 3 "immediate_operand" "i")]
2315 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2317 (if_then_else (match_test "<Is_float_mode>")
2318 (const_string "neon_fp_compare_s<q>")
2319 (if_then_else (match_operand 2 "zero_operand")
2320 (const_string "neon_compare_zero<q>")
2321 (const_string "neon_compare<q>"))))]
2324 (define_insn "neon_vclt<mode>"
2325 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2326 (unspec:<V_cmp_result>
2327 [(match_operand:VDQW 1 "s_register_operand" "w")
2328 (match_operand:VDQW 2 "zero_operand" "Dz")
2329 (match_operand:SI 3 "immediate_operand" "i")]
2332 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2334 (if_then_else (match_test "<Is_float_mode>")
2335 (const_string "neon_fp_compare_s<q>")
2336 (if_then_else (match_operand 2 "zero_operand")
2337 (const_string "neon_compare_zero<q>")
2338 (const_string "neon_compare<q>"))))]
2341 (define_insn "neon_vcage<mode>"
2342 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2343 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2344 (match_operand:VCVTF 2 "s_register_operand" "w")
2345 (match_operand:SI 3 "immediate_operand" "i")]
2348 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2349 [(set_attr "type" "neon_fp_compare_s<q>")]
2352 (define_insn "neon_vcagt<mode>"
2353 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2354 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2355 (match_operand:VCVTF 2 "s_register_operand" "w")
2356 (match_operand:SI 3 "immediate_operand" "i")]
2359 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2360 [(set_attr "type" "neon_fp_compare_s<q>")]
2363 (define_insn "neon_vtst<mode>"
2364 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2365 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2366 (match_operand:VDQIW 2 "s_register_operand" "w")
2367 (match_operand:SI 3 "immediate_operand" "i")]
2370 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2371 [(set_attr "type" "neon_tst<q>")]
2374 (define_insn "neon_vabd<mode>"
2375 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2376 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2377 (match_operand:VDQW 2 "s_register_operand" "w")
2378 (match_operand:SI 3 "immediate_operand" "i")]
2381 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2383 (if_then_else (match_test "<Is_float_mode>")
2384 (const_string "neon_fp_abd_s<q>")
2385 (const_string "neon_abd<q>")))]
2388 (define_insn "neon_vabdl<mode>"
2389 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2390 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2391 (match_operand:VW 2 "s_register_operand" "w")
2392 (match_operand:SI 3 "immediate_operand" "i")]
2395 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2396 [(set_attr "type" "neon_abd_long")]
2399 (define_insn "neon_vaba<mode>"
2400 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2401 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2402 (match_operand:VDQIW 3 "s_register_operand" "w")
2403 (match_operand:SI 4 "immediate_operand" "i")]
2405 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2407 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2408 [(set_attr "type" "neon_arith_acc<q>")]
2411 (define_insn "neon_vabal<mode>"
2412 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2413 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2414 (match_operand:VW 3 "s_register_operand" "w")
2415 (match_operand:SI 4 "immediate_operand" "i")]
2417 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2419 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2420 [(set_attr "type" "neon_arith_acc<q>")]
2423 (define_insn "neon_vmax<mode>"
2424 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2425 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2426 (match_operand:VDQW 2 "s_register_operand" "w")
2427 (match_operand:SI 3 "immediate_operand" "i")]
2430 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2432 (if_then_else (match_test "<Is_float_mode>")
2433 (const_string "neon_fp_minmax_s<q>")
2434 (const_string "neon_minmax<q>")))]
2437 (define_insn "neon_vmin<mode>"
2438 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2439 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2440 (match_operand:VDQW 2 "s_register_operand" "w")
2441 (match_operand:SI 3 "immediate_operand" "i")]
2444 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2446 (if_then_else (match_test "<Is_float_mode>")
2447 (const_string "neon_fp_minmax_s<q>")
2448 (const_string "neon_minmax<q>")))]
2451 (define_expand "neon_vpadd<mode>"
2452 [(match_operand:VD 0 "s_register_operand" "=w")
2453 (match_operand:VD 1 "s_register_operand" "w")
2454 (match_operand:VD 2 "s_register_operand" "w")
2455 (match_operand:SI 3 "immediate_operand" "i")]
2458 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2463 (define_insn "neon_vpaddl<mode>"
2464 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2465 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2466 (match_operand:SI 2 "immediate_operand" "i")]
2469 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2470 [(set_attr "type" "neon_reduc_add_long")]
2473 (define_insn "neon_vpadal<mode>"
2474 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2475 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2476 (match_operand:VDQIW 2 "s_register_operand" "w")
2477 (match_operand:SI 3 "immediate_operand" "i")]
2480 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2481 [(set_attr "type" "neon_reduc_add_acc")]
2484 (define_insn "neon_vpmax<mode>"
2485 [(set (match_operand:VD 0 "s_register_operand" "=w")
2486 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2487 (match_operand:VD 2 "s_register_operand" "w")
2488 (match_operand:SI 3 "immediate_operand" "i")]
2491 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2493 (if_then_else (match_test "<Is_float_mode>")
2494 (const_string "neon_fp_reduc_minmax_s<q>")
2495 (const_string "neon_reduc_minmax<q>")))]
2498 (define_insn "neon_vpmin<mode>"
2499 [(set (match_operand:VD 0 "s_register_operand" "=w")
2500 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2501 (match_operand:VD 2 "s_register_operand" "w")
2502 (match_operand:SI 3 "immediate_operand" "i")]
2505 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2507 (if_then_else (match_test "<Is_float_mode>")
2508 (const_string "neon_fp_reduc_minmax_s<q>")
2509 (const_string "neon_reduc_minmax<q>")))]
2512 (define_insn "neon_vrecps<mode>"
2513 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2514 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2515 (match_operand:VCVTF 2 "s_register_operand" "w")
2516 (match_operand:SI 3 "immediate_operand" "i")]
2519 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2520 [(set_attr "type" "neon_fp_recps_s<q>")]
2523 (define_insn "neon_vrsqrts<mode>"
2524 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2525 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2526 (match_operand:VCVTF 2 "s_register_operand" "w")
2527 (match_operand:SI 3 "immediate_operand" "i")]
2530 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2531 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2534 (define_expand "neon_vabs<mode>"
2535 [(match_operand:VDQW 0 "s_register_operand" "")
2536 (match_operand:VDQW 1 "s_register_operand" "")
2537 (match_operand:SI 2 "immediate_operand" "")]
2540 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2544 (define_insn "neon_vqabs<mode>"
2545 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2546 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2547 (match_operand:SI 2 "immediate_operand" "i")]
2550 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2551 [(set_attr "type" "neon_qabs<q>")]
2554 (define_insn "neon_bswap<mode>"
2555 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2556 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2558 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2559 [(set_attr "type" "neon_rev<q>")]
2562 (define_expand "neon_vneg<mode>"
2563 [(match_operand:VDQW 0 "s_register_operand" "")
2564 (match_operand:VDQW 1 "s_register_operand" "")
2565 (match_operand:SI 2 "immediate_operand" "")]
2568 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2572 (define_insn "neon_vqneg<mode>"
2573 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2574 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2575 (match_operand:SI 2 "immediate_operand" "i")]
2578 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2579 [(set_attr "type" "neon_qneg<q>")]
2582 (define_insn "neon_vcls<mode>"
2583 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2584 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2585 (match_operand:SI 2 "immediate_operand" "i")]
2588 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2589 [(set_attr "type" "neon_cls<q>")]
2592 (define_insn "clz<mode>2"
2593 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2594 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2596 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2597 [(set_attr "type" "neon_cnt<q>")]
2600 (define_expand "neon_vclz<mode>"
2601 [(match_operand:VDQIW 0 "s_register_operand" "")
2602 (match_operand:VDQIW 1 "s_register_operand" "")
2603 (match_operand:SI 2 "immediate_operand" "")]
2606 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2610 (define_insn "popcount<mode>2"
2611 [(set (match_operand:VE 0 "s_register_operand" "=w")
2612 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2614 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2615 [(set_attr "type" "neon_cnt<q>")]
2618 (define_expand "neon_vcnt<mode>"
2619 [(match_operand:VE 0 "s_register_operand" "=w")
2620 (match_operand:VE 1 "s_register_operand" "w")
2621 (match_operand:SI 2 "immediate_operand" "i")]
2624 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2628 (define_insn "neon_vrecpe<mode>"
2629 [(set (match_operand:V32 0 "s_register_operand" "=w")
2630 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2631 (match_operand:SI 2 "immediate_operand" "i")]
2634 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2635 [(set_attr "type" "neon_fp_recpe_s<q>")]
2638 (define_insn "neon_vrsqrte<mode>"
2639 [(set (match_operand:V32 0 "s_register_operand" "=w")
2640 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2641 (match_operand:SI 2 "immediate_operand" "i")]
2644 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2645 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2648 (define_expand "neon_vmvn<mode>"
2649 [(match_operand:VDQIW 0 "s_register_operand" "")
2650 (match_operand:VDQIW 1 "s_register_operand" "")
2651 (match_operand:SI 2 "immediate_operand" "")]
2654 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2658 (define_insn "neon_vget_lane<mode>_sext_internal"
2659 [(set (match_operand:SI 0 "s_register_operand" "=r")
2661 (vec_select:<V_elem>
2662 (match_operand:VD 1 "s_register_operand" "w")
2663 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2666 if (BYTES_BIG_ENDIAN)
2668 int elt = INTVAL (operands[2]);
2669 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2670 operands[2] = GEN_INT (elt);
2672 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2674 [(set_attr "type" "neon_to_gp")]
2677 (define_insn "neon_vget_lane<mode>_zext_internal"
2678 [(set (match_operand:SI 0 "s_register_operand" "=r")
2680 (vec_select:<V_elem>
2681 (match_operand:VD 1 "s_register_operand" "w")
2682 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2685 if (BYTES_BIG_ENDIAN)
2687 int elt = INTVAL (operands[2]);
2688 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2689 operands[2] = GEN_INT (elt);
2691 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2693 [(set_attr "type" "neon_to_gp")]
2696 (define_insn "neon_vget_lane<mode>_sext_internal"
2697 [(set (match_operand:SI 0 "s_register_operand" "=r")
2699 (vec_select:<V_elem>
2700 (match_operand:VQ 1 "s_register_operand" "w")
2701 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2705 int regno = REGNO (operands[1]);
2706 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2707 unsigned int elt = INTVAL (operands[2]);
2708 unsigned int elt_adj = elt % halfelts;
2710 if (BYTES_BIG_ENDIAN)
2711 elt_adj = halfelts - 1 - elt_adj;
2713 ops[0] = operands[0];
2714 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2715 ops[2] = GEN_INT (elt_adj);
2716 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2720 [(set_attr "type" "neon_to_gp_q")]
2723 (define_insn "neon_vget_lane<mode>_zext_internal"
2724 [(set (match_operand:SI 0 "s_register_operand" "=r")
2726 (vec_select:<V_elem>
2727 (match_operand:VQ 1 "s_register_operand" "w")
2728 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2732 int regno = REGNO (operands[1]);
2733 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2734 unsigned int elt = INTVAL (operands[2]);
2735 unsigned int elt_adj = elt % halfelts;
2737 if (BYTES_BIG_ENDIAN)
2738 elt_adj = halfelts - 1 - elt_adj;
2740 ops[0] = operands[0];
2741 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2742 ops[2] = GEN_INT (elt_adj);
2743 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2747 [(set_attr "type" "neon_to_gp_q")]
2750 (define_expand "neon_vget_lane<mode>"
2751 [(match_operand:<V_ext> 0 "s_register_operand" "")
2752 (match_operand:VDQW 1 "s_register_operand" "")
2753 (match_operand:SI 2 "immediate_operand" "")
2754 (match_operand:SI 3 "immediate_operand" "")]
2757 HOST_WIDE_INT magic = INTVAL (operands[3]);
2760 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2762 if (BYTES_BIG_ENDIAN)
2764 /* The intrinsics are defined in terms of a model where the
2765 element ordering in memory is vldm order, whereas the generic
2766 RTL is defined in terms of a model where the element ordering
2767 in memory is array order. Convert the lane number to conform
2769 unsigned int elt = INTVAL (operands[2]);
2770 unsigned int reg_nelts
2771 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2772 elt ^= reg_nelts - 1;
2773 operands[2] = GEN_INT (elt);
2776 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2777 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2780 if ((magic & 1) != 0)
2781 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2784 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2791 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2794 (define_expand "neon_vget_lanedi"
2795 [(match_operand:DI 0 "s_register_operand" "=r")
2796 (match_operand:DI 1 "s_register_operand" "w")
2797 (match_operand:SI 2 "immediate_operand" "i")
2798 (match_operand:SI 3 "immediate_operand" "i")]
2801 neon_lane_bounds (operands[2], 0, 1);
2802 emit_move_insn (operands[0], operands[1]);
2806 (define_expand "neon_vget_lanev2di"
2807 [(match_operand:DI 0 "s_register_operand" "")
2808 (match_operand:V2DI 1 "s_register_operand" "")
2809 (match_operand:SI 2 "immediate_operand" "")
2810 (match_operand:SI 3 "immediate_operand" "")]
2813 switch (INTVAL (operands[2]))
2816 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2819 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2822 neon_lane_bounds (operands[2], 0, 1);
2828 (define_expand "neon_vset_lane<mode>"
2829 [(match_operand:VDQ 0 "s_register_operand" "=w")
2830 (match_operand:<V_elem> 1 "s_register_operand" "r")
2831 (match_operand:VDQ 2 "s_register_operand" "0")
2832 (match_operand:SI 3 "immediate_operand" "i")]
2835 unsigned int elt = INTVAL (operands[3]);
2836 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2838 if (BYTES_BIG_ENDIAN)
2840 unsigned int reg_nelts
2841 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2842 elt ^= reg_nelts - 1;
2845 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2846 GEN_INT (1 << elt), operands[2]));
2850 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2852 (define_expand "neon_vset_lanedi"
2853 [(match_operand:DI 0 "s_register_operand" "=w")
2854 (match_operand:DI 1 "s_register_operand" "r")
2855 (match_operand:DI 2 "s_register_operand" "0")
2856 (match_operand:SI 3 "immediate_operand" "i")]
2859 neon_lane_bounds (operands[3], 0, 1);
2860 emit_move_insn (operands[0], operands[1]);
2864 (define_expand "neon_vcreate<mode>"
2865 [(match_operand:VDX 0 "s_register_operand" "")
2866 (match_operand:DI 1 "general_operand" "")]
2869 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2870 emit_move_insn (operands[0], src);
2874 (define_insn "neon_vdup_n<mode>"
2875 [(set (match_operand:VX 0 "s_register_operand" "=w")
2876 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2878 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2879 [(set_attr "type" "neon_from_gp<q>")]
2882 (define_insn "neon_vdup_n<mode>"
2883 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2884 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2887 vdup.<V_sz_elem>\t%<V_reg>0, %1
2888 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2889 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2892 (define_expand "neon_vdup_ndi"
2893 [(match_operand:DI 0 "s_register_operand" "=w")
2894 (match_operand:DI 1 "s_register_operand" "r")]
2897 emit_move_insn (operands[0], operands[1]);
2902 (define_insn "neon_vdup_nv2di"
2903 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2904 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2907 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2908 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2909 [(set_attr "length" "8")
2910 (set_attr "type" "multiple")]
2913 (define_insn "neon_vdup_lane<mode>_internal"
2914 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2916 (vec_select:<V_elem>
2917 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2918 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2921 if (BYTES_BIG_ENDIAN)
2923 int elt = INTVAL (operands[2]);
2924 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2925 operands[2] = GEN_INT (elt);
2928 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2930 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2932 [(set_attr "type" "neon_dup<q>")]
2935 (define_expand "neon_vdup_lane<mode>"
2936 [(match_operand:VDQW 0 "s_register_operand" "=w")
2937 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2938 (match_operand:SI 2 "immediate_operand" "i")]
2941 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2942 if (BYTES_BIG_ENDIAN)
2944 unsigned int elt = INTVAL (operands[2]);
2945 unsigned int reg_nelts
2946 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2947 elt ^= reg_nelts - 1;
2948 operands[2] = GEN_INT (elt);
2950 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2955 ; Scalar index is ignored, since only zero is valid here.
2956 (define_expand "neon_vdup_lanedi"
2957 [(match_operand:DI 0 "s_register_operand" "=w")
2958 (match_operand:DI 1 "s_register_operand" "w")
2959 (match_operand:SI 2 "immediate_operand" "i")]
2962 neon_lane_bounds (operands[2], 0, 1);
2963 emit_move_insn (operands[0], operands[1]);
2967 ; Likewise for v2di, as the DImode second operand has only a single element.
2968 (define_expand "neon_vdup_lanev2di"
2969 [(match_operand:V2DI 0 "s_register_operand" "=w")
2970 (match_operand:DI 1 "s_register_operand" "w")
2971 (match_operand:SI 2 "immediate_operand" "i")]
2974 neon_lane_bounds (operands[2], 0, 1);
2975 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2979 ; Disabled before reload because we don't want combine doing something silly,
2980 ; but used by the post-reload expansion of neon_vcombine.
2981 (define_insn "*neon_vswp<mode>"
2982 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2983 (match_operand:VDQX 1 "s_register_operand" "+w"))
2984 (set (match_dup 1) (match_dup 0))]
2985 "TARGET_NEON && reload_completed"
2986 "vswp\t%<V_reg>0, %<V_reg>1"
2987 [(set_attr "type" "neon_permute<q>")]
2990 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2992 ;; FIXME: A different implementation of this builtin could make it much
2993 ;; more likely that we wouldn't actually need to output anything (we could make
2994 ;; it so that the reg allocator puts things in the right places magically
2995 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2997 (define_insn_and_split "neon_vcombine<mode>"
2998 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2999 (vec_concat:<V_DOUBLE>
3000 (match_operand:VDX 1 "s_register_operand" "w")
3001 (match_operand:VDX 2 "s_register_operand" "w")))]
3004 "&& reload_completed"
3007 neon_split_vcombine (operands);
3010 [(set_attr "type" "multiple")]
3013 (define_expand "neon_vget_high<mode>"
3014 [(match_operand:<V_HALF> 0 "s_register_operand")
3015 (match_operand:VQX 1 "s_register_operand")]
3018 emit_move_insn (operands[0],
3019 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3020 GET_MODE_SIZE (<V_HALF>mode)));
3024 (define_expand "neon_vget_low<mode>"
3025 [(match_operand:<V_HALF> 0 "s_register_operand")
3026 (match_operand:VQX 1 "s_register_operand")]
3029 emit_move_insn (operands[0],
3030 simplify_gen_subreg (<V_HALF>mode, operands[1],
3035 (define_insn "float<mode><V_cvtto>2"
3036 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3037 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3038 "TARGET_NEON && !flag_rounding_math"
3039 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3040 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3043 (define_insn "floatuns<mode><V_cvtto>2"
3044 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3045 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3046 "TARGET_NEON && !flag_rounding_math"
3047 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3048 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3051 (define_insn "fix_trunc<mode><V_cvtto>2"
3052 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3053 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3055 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3056 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3059 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3060 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3061 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3063 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3064 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3067 (define_insn "neon_vcvt<mode>"
3068 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3069 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3070 (match_operand:SI 2 "immediate_operand" "i")]
3073 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3074 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3077 (define_insn "neon_vcvt<mode>"
3078 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3079 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3080 (match_operand:SI 2 "immediate_operand" "i")]
3083 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3084 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3087 (define_insn "neon_vcvtv4sfv4hf"
3088 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3089 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3091 "TARGET_NEON && TARGET_FP16"
3092 "vcvt.f32.f16\t%q0, %P1"
3093 [(set_attr "type" "neon_fp_cvt_widen_h")]
3096 (define_insn "neon_vcvtv4hfv4sf"
3097 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3098 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3100 "TARGET_NEON && TARGET_FP16"
3101 "vcvt.f16.f32\t%P0, %q1"
3102 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3105 (define_insn "neon_vcvt_n<mode>"
3106 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3107 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3108 (match_operand:SI 2 "immediate_operand" "i")
3109 (match_operand:SI 3 "immediate_operand" "i")]
3113 neon_const_bounds (operands[2], 1, 33);
3114 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3116 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3119 (define_insn "neon_vcvt_n<mode>"
3120 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3121 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3122 (match_operand:SI 2 "immediate_operand" "i")
3123 (match_operand:SI 3 "immediate_operand" "i")]
3127 neon_const_bounds (operands[2], 1, 33);
3128 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3130 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3133 (define_insn "neon_vmovn<mode>"
3134 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3135 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3136 (match_operand:SI 2 "immediate_operand" "i")]
3139 "vmovn.<V_if_elem>\t%P0, %q1"
3140 [(set_attr "type" "neon_shift_imm_narrow_q")]
3143 (define_insn "neon_vqmovn<mode>"
3144 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3145 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3146 (match_operand:SI 2 "immediate_operand" "i")]
3149 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3150 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3153 (define_insn "neon_vqmovun<mode>"
3154 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3155 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3156 (match_operand:SI 2 "immediate_operand" "i")]
3159 "vqmovun.<V_s_elem>\t%P0, %q1"
3160 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3163 (define_insn "neon_vmovl<mode>"
3164 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3165 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3166 (match_operand:SI 2 "immediate_operand" "i")]
3169 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3170 [(set_attr "type" "neon_shift_imm_long")]
3173 (define_insn "neon_vmul_lane<mode>"
3174 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3175 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3176 (match_operand:VMD 2 "s_register_operand"
3177 "<scalar_mul_constraint>")
3178 (match_operand:SI 3 "immediate_operand" "i")
3179 (match_operand:SI 4 "immediate_operand" "i")]
3183 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3184 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3187 (if_then_else (match_test "<Is_float_mode>")
3188 (const_string "neon_fp_mul_s_scalar<q>")
3189 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3192 (define_insn "neon_vmul_lane<mode>"
3193 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3194 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3195 (match_operand:<V_HALF> 2 "s_register_operand"
3196 "<scalar_mul_constraint>")
3197 (match_operand:SI 3 "immediate_operand" "i")
3198 (match_operand:SI 4 "immediate_operand" "i")]
3202 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3203 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3206 (if_then_else (match_test "<Is_float_mode>")
3207 (const_string "neon_fp_mul_s_scalar<q>")
3208 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3211 (define_insn "neon_vmull_lane<mode>"
3212 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3213 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3214 (match_operand:VMDI 2 "s_register_operand"
3215 "<scalar_mul_constraint>")
3216 (match_operand:SI 3 "immediate_operand" "i")
3217 (match_operand:SI 4 "immediate_operand" "i")]
3218 UNSPEC_VMULL_LANE))]
3221 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3222 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3224 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3227 (define_insn "neon_vqdmull_lane<mode>"
3228 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3229 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3230 (match_operand:VMDI 2 "s_register_operand"
3231 "<scalar_mul_constraint>")
3232 (match_operand:SI 3 "immediate_operand" "i")
3233 (match_operand:SI 4 "immediate_operand" "i")]
3234 UNSPEC_VQDMULL_LANE))]
3237 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3238 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3240 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3243 (define_insn "neon_vqdmulh_lane<mode>"
3244 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3245 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3246 (match_operand:<V_HALF> 2 "s_register_operand"
3247 "<scalar_mul_constraint>")
3248 (match_operand:SI 3 "immediate_operand" "i")
3249 (match_operand:SI 4 "immediate_operand" "i")]
3250 UNSPEC_VQDMULH_LANE))]
3253 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3254 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3256 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3259 (define_insn "neon_vqdmulh_lane<mode>"
3260 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3261 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3262 (match_operand:VMDI 2 "s_register_operand"
3263 "<scalar_mul_constraint>")
3264 (match_operand:SI 3 "immediate_operand" "i")
3265 (match_operand:SI 4 "immediate_operand" "i")]
3266 UNSPEC_VQDMULH_LANE))]
3269 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3270 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3272 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3275 (define_insn "neon_vmla_lane<mode>"
3276 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3277 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3278 (match_operand:VMD 2 "s_register_operand" "w")
3279 (match_operand:VMD 3 "s_register_operand"
3280 "<scalar_mul_constraint>")
3281 (match_operand:SI 4 "immediate_operand" "i")
3282 (match_operand:SI 5 "immediate_operand" "i")]
3286 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3287 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3290 (if_then_else (match_test "<Is_float_mode>")
3291 (const_string "neon_fp_mla_s_scalar<q>")
3292 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3295 (define_insn "neon_vmla_lane<mode>"
3296 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3297 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3298 (match_operand:VMQ 2 "s_register_operand" "w")
3299 (match_operand:<V_HALF> 3 "s_register_operand"
3300 "<scalar_mul_constraint>")
3301 (match_operand:SI 4 "immediate_operand" "i")
3302 (match_operand:SI 5 "immediate_operand" "i")]
3306 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3307 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3310 (if_then_else (match_test "<Is_float_mode>")
3311 (const_string "neon_fp_mla_s_scalar<q>")
3312 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3315 (define_insn "neon_vmlal_lane<mode>"
3316 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3317 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3318 (match_operand:VMDI 2 "s_register_operand" "w")
3319 (match_operand:VMDI 3 "s_register_operand"
3320 "<scalar_mul_constraint>")
3321 (match_operand:SI 4 "immediate_operand" "i")
3322 (match_operand:SI 5 "immediate_operand" "i")]
3323 UNSPEC_VMLAL_LANE))]
3326 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3327 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3329 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3332 (define_insn "neon_vqdmlal_lane<mode>"
3333 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3334 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3335 (match_operand:VMDI 2 "s_register_operand" "w")
3336 (match_operand:VMDI 3 "s_register_operand"
3337 "<scalar_mul_constraint>")
3338 (match_operand:SI 4 "immediate_operand" "i")
3339 (match_operand:SI 5 "immediate_operand" "i")]
3340 UNSPEC_VQDMLAL_LANE))]
3343 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3344 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3346 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3349 (define_insn "neon_vmls_lane<mode>"
3350 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3351 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3352 (match_operand:VMD 2 "s_register_operand" "w")
3353 (match_operand:VMD 3 "s_register_operand"
3354 "<scalar_mul_constraint>")
3355 (match_operand:SI 4 "immediate_operand" "i")
3356 (match_operand:SI 5 "immediate_operand" "i")]
3360 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3361 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3364 (if_then_else (match_test "<Is_float_mode>")
3365 (const_string "neon_fp_mla_s_scalar<q>")
3366 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3369 (define_insn "neon_vmls_lane<mode>"
3370 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3371 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3372 (match_operand:VMQ 2 "s_register_operand" "w")
3373 (match_operand:<V_HALF> 3 "s_register_operand"
3374 "<scalar_mul_constraint>")
3375 (match_operand:SI 4 "immediate_operand" "i")
3376 (match_operand:SI 5 "immediate_operand" "i")]
3380 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3381 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3384 (if_then_else (match_test "<Is_float_mode>")
3385 (const_string "neon_fp_mla_s_scalar<q>")
3386 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3389 (define_insn "neon_vmlsl_lane<mode>"
3390 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3391 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3392 (match_operand:VMDI 2 "s_register_operand" "w")
3393 (match_operand:VMDI 3 "s_register_operand"
3394 "<scalar_mul_constraint>")
3395 (match_operand:SI 4 "immediate_operand" "i")
3396 (match_operand:SI 5 "immediate_operand" "i")]
3397 UNSPEC_VMLSL_LANE))]
3400 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3401 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3403 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3406 (define_insn "neon_vqdmlsl_lane<mode>"
3407 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3408 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3409 (match_operand:VMDI 2 "s_register_operand" "w")
3410 (match_operand:VMDI 3 "s_register_operand"
3411 "<scalar_mul_constraint>")
3412 (match_operand:SI 4 "immediate_operand" "i")
3413 (match_operand:SI 5 "immediate_operand" "i")]
3414 UNSPEC_VQDMLSL_LANE))]
3417 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3418 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3420 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3423 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3424 ; core register into a temp register, then use a scalar taken from that. This
3425 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3426 ; or extracted from another vector. The latter case it's currently better to
3427 ; use the "_lane" variant, and the former case can probably be implemented
3428 ; using vld1_lane, but that hasn't been done yet.
3430 (define_expand "neon_vmul_n<mode>"
3431 [(match_operand:VMD 0 "s_register_operand" "")
3432 (match_operand:VMD 1 "s_register_operand" "")
3433 (match_operand:<V_elem> 2 "s_register_operand" "")
3434 (match_operand:SI 3 "immediate_operand" "")]
3437 rtx tmp = gen_reg_rtx (<MODE>mode);
3438 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3439 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3440 const0_rtx, const0_rtx));
3444 (define_expand "neon_vmul_n<mode>"
3445 [(match_operand:VMQ 0 "s_register_operand" "")
3446 (match_operand:VMQ 1 "s_register_operand" "")
3447 (match_operand:<V_elem> 2 "s_register_operand" "")
3448 (match_operand:SI 3 "immediate_operand" "")]
3451 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3452 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3453 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3454 const0_rtx, const0_rtx));
3458 (define_expand "neon_vmull_n<mode>"
3459 [(match_operand:<V_widen> 0 "s_register_operand" "")
3460 (match_operand:VMDI 1 "s_register_operand" "")
3461 (match_operand:<V_elem> 2 "s_register_operand" "")
3462 (match_operand:SI 3 "immediate_operand" "")]
3465 rtx tmp = gen_reg_rtx (<MODE>mode);
3466 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3467 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3468 const0_rtx, operands[3]));
3472 (define_expand "neon_vqdmull_n<mode>"
3473 [(match_operand:<V_widen> 0 "s_register_operand" "")
3474 (match_operand:VMDI 1 "s_register_operand" "")
3475 (match_operand:<V_elem> 2 "s_register_operand" "")
3476 (match_operand:SI 3 "immediate_operand" "")]
3479 rtx tmp = gen_reg_rtx (<MODE>mode);
3480 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3481 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3482 const0_rtx, const0_rtx));
3486 (define_expand "neon_vqdmulh_n<mode>"
3487 [(match_operand:VMDI 0 "s_register_operand" "")
3488 (match_operand:VMDI 1 "s_register_operand" "")
3489 (match_operand:<V_elem> 2 "s_register_operand" "")
3490 (match_operand:SI 3 "immediate_operand" "")]
3493 rtx tmp = gen_reg_rtx (<MODE>mode);
3494 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3495 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3496 const0_rtx, operands[3]));
3500 (define_expand "neon_vqdmulh_n<mode>"
3501 [(match_operand:VMQI 0 "s_register_operand" "")
3502 (match_operand:VMQI 1 "s_register_operand" "")
3503 (match_operand:<V_elem> 2 "s_register_operand" "")
3504 (match_operand:SI 3 "immediate_operand" "")]
3507 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3508 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3509 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3510 const0_rtx, operands[3]));
3514 (define_expand "neon_vmla_n<mode>"
3515 [(match_operand:VMD 0 "s_register_operand" "")
3516 (match_operand:VMD 1 "s_register_operand" "")
3517 (match_operand:VMD 2 "s_register_operand" "")
3518 (match_operand:<V_elem> 3 "s_register_operand" "")
3519 (match_operand:SI 4 "immediate_operand" "")]
3522 rtx tmp = gen_reg_rtx (<MODE>mode);
3523 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3524 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3525 tmp, const0_rtx, operands[4]));
3529 (define_expand "neon_vmla_n<mode>"
3530 [(match_operand:VMQ 0 "s_register_operand" "")
3531 (match_operand:VMQ 1 "s_register_operand" "")
3532 (match_operand:VMQ 2 "s_register_operand" "")
3533 (match_operand:<V_elem> 3 "s_register_operand" "")
3534 (match_operand:SI 4 "immediate_operand" "")]
3537 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3538 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3539 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3540 tmp, const0_rtx, operands[4]));
3544 (define_expand "neon_vmlal_n<mode>"
3545 [(match_operand:<V_widen> 0 "s_register_operand" "")
3546 (match_operand:<V_widen> 1 "s_register_operand" "")
3547 (match_operand:VMDI 2 "s_register_operand" "")
3548 (match_operand:<V_elem> 3 "s_register_operand" "")
3549 (match_operand:SI 4 "immediate_operand" "")]
3552 rtx tmp = gen_reg_rtx (<MODE>mode);
3553 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3554 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3555 tmp, const0_rtx, operands[4]));
3559 (define_expand "neon_vqdmlal_n<mode>"
3560 [(match_operand:<V_widen> 0 "s_register_operand" "")
3561 (match_operand:<V_widen> 1 "s_register_operand" "")
3562 (match_operand:VMDI 2 "s_register_operand" "")
3563 (match_operand:<V_elem> 3 "s_register_operand" "")
3564 (match_operand:SI 4 "immediate_operand" "")]
3567 rtx tmp = gen_reg_rtx (<MODE>mode);
3568 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3569 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3570 tmp, const0_rtx, operands[4]));
3574 (define_expand "neon_vmls_n<mode>"
3575 [(match_operand:VMD 0 "s_register_operand" "")
3576 (match_operand:VMD 1 "s_register_operand" "")
3577 (match_operand:VMD 2 "s_register_operand" "")
3578 (match_operand:<V_elem> 3 "s_register_operand" "")
3579 (match_operand:SI 4 "immediate_operand" "")]
3582 rtx tmp = gen_reg_rtx (<MODE>mode);
3583 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3584 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3585 tmp, const0_rtx, operands[4]));
3589 (define_expand "neon_vmls_n<mode>"
3590 [(match_operand:VMQ 0 "s_register_operand" "")
3591 (match_operand:VMQ 1 "s_register_operand" "")
3592 (match_operand:VMQ 2 "s_register_operand" "")
3593 (match_operand:<V_elem> 3 "s_register_operand" "")
3594 (match_operand:SI 4 "immediate_operand" "")]
3597 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3598 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3599 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3600 tmp, const0_rtx, operands[4]));
3604 (define_expand "neon_vmlsl_n<mode>"
3605 [(match_operand:<V_widen> 0 "s_register_operand" "")
3606 (match_operand:<V_widen> 1 "s_register_operand" "")
3607 (match_operand:VMDI 2 "s_register_operand" "")
3608 (match_operand:<V_elem> 3 "s_register_operand" "")
3609 (match_operand:SI 4 "immediate_operand" "")]
3612 rtx tmp = gen_reg_rtx (<MODE>mode);
3613 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3614 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3615 tmp, const0_rtx, operands[4]));
3619 (define_expand "neon_vqdmlsl_n<mode>"
3620 [(match_operand:<V_widen> 0 "s_register_operand" "")
3621 (match_operand:<V_widen> 1 "s_register_operand" "")
3622 (match_operand:VMDI 2 "s_register_operand" "")
3623 (match_operand:<V_elem> 3 "s_register_operand" "")
3624 (match_operand:SI 4 "immediate_operand" "")]
3627 rtx tmp = gen_reg_rtx (<MODE>mode);
3628 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3629 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3630 tmp, const0_rtx, operands[4]));
3634 (define_insn "neon_vext<mode>"
3635 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3636 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3637 (match_operand:VDQX 2 "s_register_operand" "w")
3638 (match_operand:SI 3 "immediate_operand" "i")]
3642 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3643 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3645 [(set_attr "type" "neon_ext<q>")]
3648 (define_insn "neon_vrev64<mode>"
3649 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3650 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3651 (match_operand:SI 2 "immediate_operand" "i")]
3654 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3655 [(set_attr "type" "neon_rev<q>")]
3658 (define_insn "neon_vrev32<mode>"
3659 [(set (match_operand:VX 0 "s_register_operand" "=w")
3660 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3661 (match_operand:SI 2 "immediate_operand" "i")]
3664 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3665 [(set_attr "type" "neon_rev<q>")]
3668 (define_insn "neon_vrev16<mode>"
3669 [(set (match_operand:VE 0 "s_register_operand" "=w")
3670 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3671 (match_operand:SI 2 "immediate_operand" "i")]
3674 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3675 [(set_attr "type" "neon_rev<q>")]
3678 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3679 ; allocation. For an intrinsic of form:
3680 ; rD = vbsl_* (rS, rN, rM)
3681 ; We can use any of:
3682 ; vbsl rS, rN, rM (if D = S)
3683 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3684 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3686 (define_insn "neon_vbsl<mode>_internal"
3687 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3688 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3689 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3690 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3694 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3695 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3696 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3697 [(set_attr "type" "neon_bsl<q>")]
3700 (define_expand "neon_vbsl<mode>"
3701 [(set (match_operand:VDQX 0 "s_register_operand" "")
3702 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3703 (match_operand:VDQX 2 "s_register_operand" "")
3704 (match_operand:VDQX 3 "s_register_operand" "")]
3708 /* We can't alias operands together if they have different modes. */
3709 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3712 (define_insn "neon_vshl<mode>"
3713 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3714 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3715 (match_operand:VDQIX 2 "s_register_operand" "w")
3716 (match_operand:SI 3 "immediate_operand" "i")]
3719 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3720 [(set_attr "type" "neon_shift_imm<q>")]
3723 (define_insn "neon_vqshl<mode>"
3724 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3725 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3726 (match_operand:VDQIX 2 "s_register_operand" "w")
3727 (match_operand:SI 3 "immediate_operand" "i")]
3730 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3731 [(set_attr "type" "neon_sat_shift_imm<q>")]
3734 (define_insn "neon_vshr_n<mode>"
3735 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3736 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3737 (match_operand:SI 2 "immediate_operand" "i")
3738 (match_operand:SI 3 "immediate_operand" "i")]
3742 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3743 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3745 [(set_attr "type" "neon_shift_imm<q>")]
3748 (define_insn "neon_vshrn_n<mode>"
3749 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3750 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3751 (match_operand:SI 2 "immediate_operand" "i")
3752 (match_operand:SI 3 "immediate_operand" "i")]
3756 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3757 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3759 [(set_attr "type" "neon_shift_imm_narrow_q")]
3762 (define_insn "neon_vqshrn_n<mode>"
3763 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3764 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3765 (match_operand:SI 2 "immediate_operand" "i")
3766 (match_operand:SI 3 "immediate_operand" "i")]
3770 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3771 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3773 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3776 (define_insn "neon_vqshrun_n<mode>"
3777 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3778 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3779 (match_operand:SI 2 "immediate_operand" "i")
3780 (match_operand:SI 3 "immediate_operand" "i")]
3784 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3785 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3787 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3790 (define_insn "neon_vshl_n<mode>"
3791 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3792 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3793 (match_operand:SI 2 "immediate_operand" "i")
3794 (match_operand:SI 3 "immediate_operand" "i")]
3798 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3799 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3801 [(set_attr "type" "neon_shift_imm<q>")]
3804 (define_insn "neon_vqshl_n<mode>"
3805 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3806 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3807 (match_operand:SI 2 "immediate_operand" "i")
3808 (match_operand:SI 3 "immediate_operand" "i")]
3812 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3813 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3815 [(set_attr "type" "neon_sat_shift_imm<q>")]
3818 (define_insn "neon_vqshlu_n<mode>"
3819 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3820 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3821 (match_operand:SI 2 "immediate_operand" "i")
3822 (match_operand:SI 3 "immediate_operand" "i")]
3826 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3827 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3829 [(set_attr "type" "neon_sat_shift_imm<q>")]
3832 (define_insn "neon_vshll_n<mode>"
3833 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3834 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3835 (match_operand:SI 2 "immediate_operand" "i")
3836 (match_operand:SI 3 "immediate_operand" "i")]
3840 /* The boundaries are: 0 < imm <= size. */
3841 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3842 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3844 [(set_attr "type" "neon_shift_imm_long")]
3847 (define_insn "neon_vsra_n<mode>"
3848 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3849 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3850 (match_operand:VDQIX 2 "s_register_operand" "w")
3851 (match_operand:SI 3 "immediate_operand" "i")
3852 (match_operand:SI 4 "immediate_operand" "i")]
3856 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3857 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3859 [(set_attr "type" "neon_shift_acc<q>")]
3862 (define_insn "neon_vsri_n<mode>"
3863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3865 (match_operand:VDQIX 2 "s_register_operand" "w")
3866 (match_operand:SI 3 "immediate_operand" "i")]
3870 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3871 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3873 [(set_attr "type" "neon_shift_reg<q>")]
3876 (define_insn "neon_vsli_n<mode>"
3877 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3878 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3879 (match_operand:VDQIX 2 "s_register_operand" "w")
3880 (match_operand:SI 3 "immediate_operand" "i")]
3884 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3885 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3887 [(set_attr "type" "neon_shift_reg<q>")]
3890 (define_insn "neon_vtbl1v8qi"
3891 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3892 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3893 (match_operand:V8QI 2 "s_register_operand" "w")]
3896 "vtbl.8\t%P0, {%P1}, %P2"
3897 [(set_attr "type" "neon_tbl1")]
3900 (define_insn "neon_vtbl2v8qi"
3901 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3902 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3903 (match_operand:V8QI 2 "s_register_operand" "w")]
3908 int tabbase = REGNO (operands[1]);
3910 ops[0] = operands[0];
3911 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3912 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3913 ops[3] = operands[2];
3914 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3918 [(set_attr "type" "neon_tbl2")]
3921 (define_insn "neon_vtbl3v8qi"
3922 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3923 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3924 (match_operand:V8QI 2 "s_register_operand" "w")]
3929 int tabbase = REGNO (operands[1]);
3931 ops[0] = operands[0];
3932 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3933 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3934 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3935 ops[4] = operands[2];
3936 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3940 [(set_attr "type" "neon_tbl3")]
3943 (define_insn "neon_vtbl4v8qi"
3944 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3945 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3946 (match_operand:V8QI 2 "s_register_operand" "w")]
3951 int tabbase = REGNO (operands[1]);
3953 ops[0] = operands[0];
3954 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3955 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3956 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3957 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3958 ops[5] = operands[2];
3959 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3963 [(set_attr "type" "neon_tbl4")]
3966 ;; These three are used by the vec_perm infrastructure for V16QImode.
3967 (define_insn_and_split "neon_vtbl1v16qi"
3968 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3969 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3970 (match_operand:V16QI 2 "s_register_operand" "w")]
3974 "&& reload_completed"
3977 rtx op0, op1, op2, part0, part2;
3981 op1 = gen_lowpart (TImode, operands[1]);
3984 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3985 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3986 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3987 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3989 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3990 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3991 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3992 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3995 [(set_attr "type" "multiple")]
3998 (define_insn_and_split "neon_vtbl2v16qi"
3999 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4000 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4001 (match_operand:V16QI 2 "s_register_operand" "w")]
4005 "&& reload_completed"
4008 rtx op0, op1, op2, part0, part2;
4015 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4016 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4017 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4018 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4020 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4021 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4022 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4023 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4026 [(set_attr "type" "multiple")]
4029 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4030 ;; handle quad-word input modes, producing octa-word output modes. But
4031 ;; that requires us to add support for octa-word vector modes in moves.
4032 ;; That seems overkill for this one use in vec_perm.
4033 (define_insn_and_split "neon_vcombinev16qi"
4034 [(set (match_operand:OI 0 "s_register_operand" "=w")
4035 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4036 (match_operand:V16QI 2 "s_register_operand" "w")]
4040 "&& reload_completed"
4043 neon_split_vcombine (operands);
4046 [(set_attr "type" "multiple")]
4049 (define_insn "neon_vtbx1v8qi"
4050 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4051 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4052 (match_operand:V8QI 2 "s_register_operand" "w")
4053 (match_operand:V8QI 3 "s_register_operand" "w")]
4056 "vtbx.8\t%P0, {%P2}, %P3"
4057 [(set_attr "type" "neon_tbl1")]
4060 (define_insn "neon_vtbx2v8qi"
4061 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4062 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4063 (match_operand:TI 2 "s_register_operand" "w")
4064 (match_operand:V8QI 3 "s_register_operand" "w")]
4069 int tabbase = REGNO (operands[2]);
4071 ops[0] = operands[0];
4072 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4073 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4074 ops[3] = operands[3];
4075 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4079 [(set_attr "type" "neon_tbl2")]
4082 (define_insn "neon_vtbx3v8qi"
4083 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4084 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4085 (match_operand:EI 2 "s_register_operand" "w")
4086 (match_operand:V8QI 3 "s_register_operand" "w")]
4091 int tabbase = REGNO (operands[2]);
4093 ops[0] = operands[0];
4094 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4095 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4096 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4097 ops[4] = operands[3];
4098 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4102 [(set_attr "type" "neon_tbl3")]
4105 (define_insn "neon_vtbx4v8qi"
4106 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4107 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4108 (match_operand:OI 2 "s_register_operand" "w")
4109 (match_operand:V8QI 3 "s_register_operand" "w")]
4114 int tabbase = REGNO (operands[2]);
4116 ops[0] = operands[0];
4117 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4118 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4119 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4120 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4121 ops[5] = operands[3];
4122 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4126 [(set_attr "type" "neon_tbl4")]
4129 (define_expand "neon_vtrn<mode>_internal"
4131 [(set (match_operand:VDQW 0 "s_register_operand" "")
4132 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4133 (match_operand:VDQW 2 "s_register_operand" "")]
4135 (set (match_operand:VDQW 3 "s_register_operand" "")
4136 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4141 ;; Note: Different operand numbering to handle tied registers correctly.
4142 (define_insn "*neon_vtrn<mode>_insn"
4143 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4144 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4145 (match_operand:VDQW 3 "s_register_operand" "2")]
4147 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4148 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4151 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4152 [(set_attr "type" "neon_permute<q>")]
4155 (define_expand "neon_vzip<mode>_internal"
4157 [(set (match_operand:VDQW 0 "s_register_operand" "")
4158 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4159 (match_operand:VDQW 2 "s_register_operand" "")]
4161 (set (match_operand:VDQW 3 "s_register_operand" "")
4162 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4167 ;; Note: Different operand numbering to handle tied registers correctly.
4168 (define_insn "*neon_vzip<mode>_insn"
4169 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4170 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4171 (match_operand:VDQW 3 "s_register_operand" "2")]
4173 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4174 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4177 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4178 [(set_attr "type" "neon_zip<q>")]
4181 (define_expand "neon_vuzp<mode>_internal"
4183 [(set (match_operand:VDQW 0 "s_register_operand" "")
4184 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4185 (match_operand:VDQW 2 "s_register_operand" "")]
4187 (set (match_operand:VDQW 3 "s_register_operand" "")
4188 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4193 ;; Note: Different operand numbering to handle tied registers correctly.
4194 (define_insn "*neon_vuzp<mode>_insn"
4195 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4196 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4197 (match_operand:VDQW 3 "s_register_operand" "2")]
4199 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4200 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4203 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4204 [(set_attr "type" "neon_zip<q>")]
4207 (define_expand "neon_vreinterpretv8qi<mode>"
4208 [(match_operand:V8QI 0 "s_register_operand" "")
4209 (match_operand:VDX 1 "s_register_operand" "")]
4212 neon_reinterpret (operands[0], operands[1]);
4216 (define_expand "neon_vreinterpretv4hi<mode>"
4217 [(match_operand:V4HI 0 "s_register_operand" "")
4218 (match_operand:VDX 1 "s_register_operand" "")]
4221 neon_reinterpret (operands[0], operands[1]);
4225 (define_expand "neon_vreinterpretv2si<mode>"
4226 [(match_operand:V2SI 0 "s_register_operand" "")
4227 (match_operand:VDX 1 "s_register_operand" "")]
4230 neon_reinterpret (operands[0], operands[1]);
4234 (define_expand "neon_vreinterpretv2sf<mode>"
4235 [(match_operand:V2SF 0 "s_register_operand" "")
4236 (match_operand:VDX 1 "s_register_operand" "")]
4239 neon_reinterpret (operands[0], operands[1]);
4243 (define_expand "neon_vreinterpretdi<mode>"
4244 [(match_operand:DI 0 "s_register_operand" "")
4245 (match_operand:VDX 1 "s_register_operand" "")]
4248 neon_reinterpret (operands[0], operands[1]);
4252 (define_expand "neon_vreinterpretti<mode>"
4253 [(match_operand:TI 0 "s_register_operand" "")
4254 (match_operand:VQXMOV 1 "s_register_operand" "")]
4257 neon_reinterpret (operands[0], operands[1]);
4262 (define_expand "neon_vreinterpretv16qi<mode>"
4263 [(match_operand:V16QI 0 "s_register_operand" "")
4264 (match_operand:VQXMOV 1 "s_register_operand" "")]
4267 neon_reinterpret (operands[0], operands[1]);
4271 (define_expand "neon_vreinterpretv8hi<mode>"
4272 [(match_operand:V8HI 0 "s_register_operand" "")
4273 (match_operand:VQXMOV 1 "s_register_operand" "")]
4276 neon_reinterpret (operands[0], operands[1]);
4280 (define_expand "neon_vreinterpretv4si<mode>"
4281 [(match_operand:V4SI 0 "s_register_operand" "")
4282 (match_operand:VQXMOV 1 "s_register_operand" "")]
4285 neon_reinterpret (operands[0], operands[1]);
4289 (define_expand "neon_vreinterpretv4sf<mode>"
4290 [(match_operand:V4SF 0 "s_register_operand" "")
4291 (match_operand:VQXMOV 1 "s_register_operand" "")]
4294 neon_reinterpret (operands[0], operands[1]);
4298 (define_expand "neon_vreinterpretv2di<mode>"
4299 [(match_operand:V2DI 0 "s_register_operand" "")
4300 (match_operand:VQXMOV 1 "s_register_operand" "")]
4303 neon_reinterpret (operands[0], operands[1]);
4307 (define_expand "vec_load_lanes<mode><mode>"
4308 [(set (match_operand:VDQX 0 "s_register_operand")
4309 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4313 (define_insn "neon_vld1<mode>"
4314 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4315 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4318 "vld1.<V_sz_elem>\t%h0, %A1"
4319 [(set_attr "type" "neon_load1_1reg<q>")]
4322 (define_insn "neon_vld1_lane<mode>"
4323 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4324 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4325 (match_operand:VDX 2 "s_register_operand" "0")
4326 (match_operand:SI 3 "immediate_operand" "i")]
4330 HOST_WIDE_INT lane = INTVAL (operands[3]);
4331 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4332 if (lane < 0 || lane >= max)
4333 error ("lane out of range");
4335 return "vld1.<V_sz_elem>\t%P0, %A1";
4337 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4339 [(set_attr "type" "neon_load1_one_lane<q>")]
4342 (define_insn "neon_vld1_lane<mode>"
4343 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4344 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4345 (match_operand:VQX 2 "s_register_operand" "0")
4346 (match_operand:SI 3 "immediate_operand" "i")]
4350 HOST_WIDE_INT lane = INTVAL (operands[3]);
4351 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4352 int regno = REGNO (operands[0]);
4353 if (lane < 0 || lane >= max)
4354 error ("lane out of range");
4355 else if (lane >= max / 2)
4359 operands[3] = GEN_INT (lane);
4361 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4363 return "vld1.<V_sz_elem>\t%P0, %A1";
4365 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4367 [(set_attr "type" "neon_load1_one_lane<q>")]
4370 (define_insn "neon_vld1_dup<mode>"
4371 [(set (match_operand:VD 0 "s_register_operand" "=w")
4372 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4374 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4375 [(set_attr "type" "neon_load1_all_lanes<q>")]
4378 ;; Special case for DImode. Treat it exactly like a simple load.
4379 (define_expand "neon_vld1_dupdi"
4380 [(set (match_operand:DI 0 "s_register_operand" "")
4381 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4387 (define_insn "neon_vld1_dup<mode>"
4388 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4389 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4392 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4394 [(set_attr "type" "neon_load1_all_lanes<q>")]
4397 (define_insn_and_split "neon_vld1_dupv2di"
4398 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4399 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4402 "&& reload_completed"
4405 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4406 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4407 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4410 [(set_attr "length" "8")
4411 (set_attr "type" "neon_load1_all_lanes_q")]
4414 (define_expand "vec_store_lanes<mode><mode>"
4415 [(set (match_operand:VDQX 0 "neon_struct_operand")
4416 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4420 (define_insn "neon_vst1<mode>"
4421 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4422 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4425 "vst1.<V_sz_elem>\t%h1, %A0"
4426 [(set_attr "type" "neon_store1_1reg<q>")])
4428 (define_insn "neon_vst1_lane<mode>"
4429 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4431 [(match_operand:VDX 1 "s_register_operand" "w")
4432 (match_operand:SI 2 "immediate_operand" "i")]
4436 HOST_WIDE_INT lane = INTVAL (operands[2]);
4437 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4438 if (lane < 0 || lane >= max)
4439 error ("lane out of range");
4441 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4443 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4445 [(set_attr "type" "neon_store1_one_lane<q>")]
4448 (define_insn "neon_vst1_lane<mode>"
4449 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4451 [(match_operand:VQX 1 "s_register_operand" "w")
4452 (match_operand:SI 2 "immediate_operand" "i")]
4456 HOST_WIDE_INT lane = INTVAL (operands[2]);
4457 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4458 int regno = REGNO (operands[1]);
4459 if (lane < 0 || lane >= max)
4460 error ("lane out of range");
4461 else if (lane >= max / 2)
4465 operands[2] = GEN_INT (lane);
4467 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4469 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4471 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4473 [(set_attr "type" "neon_store1_one_lane<q>")]
4476 (define_expand "vec_load_lanesti<mode>"
4477 [(set (match_operand:TI 0 "s_register_operand")
4478 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4479 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4483 (define_insn "neon_vld2<mode>"
4484 [(set (match_operand:TI 0 "s_register_operand" "=w")
4485 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4486 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490 if (<V_sz_elem> == 64)
4491 return "vld1.64\t%h0, %A1";
4493 return "vld2.<V_sz_elem>\t%h0, %A1";
4496 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4497 (const_string "neon_load1_2reg<q>")
4498 (const_string "neon_load2_2reg<q>")))]
4501 (define_expand "vec_load_lanesoi<mode>"
4502 [(set (match_operand:OI 0 "s_register_operand")
4503 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4504 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4508 (define_insn "neon_vld2<mode>"
4509 [(set (match_operand:OI 0 "s_register_operand" "=w")
4510 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4511 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4514 "vld2.<V_sz_elem>\t%h0, %A1"
4515 [(set_attr "type" "neon_load2_2reg_q")])
4517 (define_insn "neon_vld2_lane<mode>"
4518 [(set (match_operand:TI 0 "s_register_operand" "=w")
4519 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4520 (match_operand:TI 2 "s_register_operand" "0")
4521 (match_operand:SI 3 "immediate_operand" "i")
4522 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4526 HOST_WIDE_INT lane = INTVAL (operands[3]);
4527 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4528 int regno = REGNO (operands[0]);
4530 if (lane < 0 || lane >= max)
4531 error ("lane out of range");
4532 ops[0] = gen_rtx_REG (DImode, regno);
4533 ops[1] = gen_rtx_REG (DImode, regno + 2);
4534 ops[2] = operands[1];
4535 ops[3] = operands[3];
4536 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4539 [(set_attr "type" "neon_load2_one_lane<q>")]
4542 (define_insn "neon_vld2_lane<mode>"
4543 [(set (match_operand:OI 0 "s_register_operand" "=w")
4544 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4545 (match_operand:OI 2 "s_register_operand" "0")
4546 (match_operand:SI 3 "immediate_operand" "i")
4547 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4551 HOST_WIDE_INT lane = INTVAL (operands[3]);
4552 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4553 int regno = REGNO (operands[0]);
4555 if (lane < 0 || lane >= max)
4556 error ("lane out of range");
4557 else if (lane >= max / 2)
4562 ops[0] = gen_rtx_REG (DImode, regno);
4563 ops[1] = gen_rtx_REG (DImode, regno + 4);
4564 ops[2] = operands[1];
4565 ops[3] = GEN_INT (lane);
4566 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4569 [(set_attr "type" "neon_load2_one_lane<q>")]
4572 (define_insn "neon_vld2_dup<mode>"
4573 [(set (match_operand:TI 0 "s_register_operand" "=w")
4574 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4575 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4580 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4582 return "vld1.<V_sz_elem>\t%h0, %A1";
4585 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4586 (const_string "neon_load2_all_lanes<q>")
4587 (const_string "neon_load1_1reg<q>")))]
4590 (define_expand "vec_store_lanesti<mode>"
4591 [(set (match_operand:TI 0 "neon_struct_operand")
4592 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4593 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 (define_insn "neon_vst2<mode>"
4598 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4599 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4600 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4604 if (<V_sz_elem> == 64)
4605 return "vst1.64\t%h1, %A0";
4607 return "vst2.<V_sz_elem>\t%h1, %A0";
4610 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4611 (const_string "neon_store1_2reg<q>")
4612 (const_string "neon_store2_one_lane<q>")))]
4615 (define_expand "vec_store_lanesoi<mode>"
4616 [(set (match_operand:OI 0 "neon_struct_operand")
4617 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4618 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4622 (define_insn "neon_vst2<mode>"
4623 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4624 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4625 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4628 "vst2.<V_sz_elem>\t%h1, %A0"
4629 [(set_attr "type" "neon_store2_4reg<q>")]
4632 (define_insn "neon_vst2_lane<mode>"
4633 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4634 (unspec:<V_two_elem>
4635 [(match_operand:TI 1 "s_register_operand" "w")
4636 (match_operand:SI 2 "immediate_operand" "i")
4637 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4641 HOST_WIDE_INT lane = INTVAL (operands[2]);
4642 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4643 int regno = REGNO (operands[1]);
4645 if (lane < 0 || lane >= max)
4646 error ("lane out of range");
4647 ops[0] = operands[0];
4648 ops[1] = gen_rtx_REG (DImode, regno);
4649 ops[2] = gen_rtx_REG (DImode, regno + 2);
4650 ops[3] = operands[2];
4651 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4654 [(set_attr "type" "neon_store2_one_lane<q>")]
4657 (define_insn "neon_vst2_lane<mode>"
4658 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4659 (unspec:<V_two_elem>
4660 [(match_operand:OI 1 "s_register_operand" "w")
4661 (match_operand:SI 2 "immediate_operand" "i")
4662 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4666 HOST_WIDE_INT lane = INTVAL (operands[2]);
4667 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4668 int regno = REGNO (operands[1]);
4670 if (lane < 0 || lane >= max)
4671 error ("lane out of range");
4672 else if (lane >= max / 2)
4677 ops[0] = operands[0];
4678 ops[1] = gen_rtx_REG (DImode, regno);
4679 ops[2] = gen_rtx_REG (DImode, regno + 4);
4680 ops[3] = GEN_INT (lane);
4681 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4684 [(set_attr "type" "neon_store2_one_lane<q>")]
4687 (define_expand "vec_load_lanesei<mode>"
4688 [(set (match_operand:EI 0 "s_register_operand")
4689 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4690 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4694 (define_insn "neon_vld3<mode>"
4695 [(set (match_operand:EI 0 "s_register_operand" "=w")
4696 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4697 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4701 if (<V_sz_elem> == 64)
4702 return "vld1.64\t%h0, %A1";
4704 return "vld3.<V_sz_elem>\t%h0, %A1";
4707 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4708 (const_string "neon_load1_3reg<q>")
4709 (const_string "neon_load3_3reg<q>")))]
4712 (define_expand "vec_load_lanesci<mode>"
4713 [(match_operand:CI 0 "s_register_operand")
4714 (match_operand:CI 1 "neon_struct_operand")
4715 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4718 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4722 (define_expand "neon_vld3<mode>"
4723 [(match_operand:CI 0 "s_register_operand")
4724 (match_operand:CI 1 "neon_struct_operand")
4725 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730 mem = adjust_address (operands[1], EImode, 0);
4731 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4732 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4733 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4737 (define_insn "neon_vld3qa<mode>"
4738 [(set (match_operand:CI 0 "s_register_operand" "=w")
4739 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4740 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4744 int regno = REGNO (operands[0]);
4746 ops[0] = gen_rtx_REG (DImode, regno);
4747 ops[1] = gen_rtx_REG (DImode, regno + 4);
4748 ops[2] = gen_rtx_REG (DImode, regno + 8);
4749 ops[3] = operands[1];
4750 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4753 [(set_attr "type" "neon_load3_3reg<q>")]
4756 (define_insn "neon_vld3qb<mode>"
4757 [(set (match_operand:CI 0 "s_register_operand" "=w")
4758 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4759 (match_operand:CI 2 "s_register_operand" "0")
4760 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4764 int regno = REGNO (operands[0]);
4766 ops[0] = gen_rtx_REG (DImode, regno + 2);
4767 ops[1] = gen_rtx_REG (DImode, regno + 6);
4768 ops[2] = gen_rtx_REG (DImode, regno + 10);
4769 ops[3] = operands[1];
4770 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4773 [(set_attr "type" "neon_load3_3reg<q>")]
4776 (define_insn "neon_vld3_lane<mode>"
4777 [(set (match_operand:EI 0 "s_register_operand" "=w")
4778 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4779 (match_operand:EI 2 "s_register_operand" "0")
4780 (match_operand:SI 3 "immediate_operand" "i")
4781 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4785 HOST_WIDE_INT lane = INTVAL (operands[3]);
4786 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4787 int regno = REGNO (operands[0]);
4789 if (lane < 0 || lane >= max)
4790 error ("lane out of range");
4791 ops[0] = gen_rtx_REG (DImode, regno);
4792 ops[1] = gen_rtx_REG (DImode, regno + 2);
4793 ops[2] = gen_rtx_REG (DImode, regno + 4);
4794 ops[3] = operands[1];
4795 ops[4] = operands[3];
4796 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4800 [(set_attr "type" "neon_load3_one_lane<q>")]
4803 (define_insn "neon_vld3_lane<mode>"
4804 [(set (match_operand:CI 0 "s_register_operand" "=w")
4805 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4806 (match_operand:CI 2 "s_register_operand" "0")
4807 (match_operand:SI 3 "immediate_operand" "i")
4808 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 HOST_WIDE_INT lane = INTVAL (operands[3]);
4813 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4814 int regno = REGNO (operands[0]);
4816 if (lane < 0 || lane >= max)
4817 error ("lane out of range");
4818 else if (lane >= max / 2)
4823 ops[0] = gen_rtx_REG (DImode, regno);
4824 ops[1] = gen_rtx_REG (DImode, regno + 4);
4825 ops[2] = gen_rtx_REG (DImode, regno + 8);
4826 ops[3] = operands[1];
4827 ops[4] = GEN_INT (lane);
4828 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4832 [(set_attr "type" "neon_load3_one_lane<q>")]
4835 (define_insn "neon_vld3_dup<mode>"
4836 [(set (match_operand:EI 0 "s_register_operand" "=w")
4837 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4838 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4842 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4844 int regno = REGNO (operands[0]);
4846 ops[0] = gen_rtx_REG (DImode, regno);
4847 ops[1] = gen_rtx_REG (DImode, regno + 2);
4848 ops[2] = gen_rtx_REG (DImode, regno + 4);
4849 ops[3] = operands[1];
4850 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4854 return "vld1.<V_sz_elem>\t%h0, %A1";
4857 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4858 (const_string "neon_load3_all_lanes<q>")
4859 (const_string "neon_load1_1reg<q>")))])
4861 (define_expand "vec_store_lanesei<mode>"
4862 [(set (match_operand:EI 0 "neon_struct_operand")
4863 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4864 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4868 (define_insn "neon_vst3<mode>"
4869 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4870 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4871 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4875 if (<V_sz_elem> == 64)
4876 return "vst1.64\t%h1, %A0";
4878 return "vst3.<V_sz_elem>\t%h1, %A0";
4881 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4882 (const_string "neon_store1_3reg<q>")
4883 (const_string "neon_store3_one_lane<q>")))])
4885 (define_expand "vec_store_lanesci<mode>"
4886 [(match_operand:CI 0 "neon_struct_operand")
4887 (match_operand:CI 1 "s_register_operand")
4888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4891 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4895 (define_expand "neon_vst3<mode>"
4896 [(match_operand:CI 0 "neon_struct_operand")
4897 (match_operand:CI 1 "s_register_operand")
4898 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4903 mem = adjust_address (operands[0], EImode, 0);
4904 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4905 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4906 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4910 (define_insn "neon_vst3qa<mode>"
4911 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4912 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4913 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4917 int regno = REGNO (operands[1]);
4919 ops[0] = operands[0];
4920 ops[1] = gen_rtx_REG (DImode, regno);
4921 ops[2] = gen_rtx_REG (DImode, regno + 4);
4922 ops[3] = gen_rtx_REG (DImode, regno + 8);
4923 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4926 [(set_attr "type" "neon_store3_3reg<q>")]
4929 (define_insn "neon_vst3qb<mode>"
4930 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4931 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4932 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936 int regno = REGNO (operands[1]);
4938 ops[0] = operands[0];
4939 ops[1] = gen_rtx_REG (DImode, regno + 2);
4940 ops[2] = gen_rtx_REG (DImode, regno + 6);
4941 ops[3] = gen_rtx_REG (DImode, regno + 10);
4942 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4945 [(set_attr "type" "neon_store3_3reg<q>")]
4948 (define_insn "neon_vst3_lane<mode>"
4949 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4950 (unspec:<V_three_elem>
4951 [(match_operand:EI 1 "s_register_operand" "w")
4952 (match_operand:SI 2 "immediate_operand" "i")
4953 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4957 HOST_WIDE_INT lane = INTVAL (operands[2]);
4958 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4959 int regno = REGNO (operands[1]);
4961 if (lane < 0 || lane >= max)
4962 error ("lane out of range");
4963 ops[0] = operands[0];
4964 ops[1] = gen_rtx_REG (DImode, regno);
4965 ops[2] = gen_rtx_REG (DImode, regno + 2);
4966 ops[3] = gen_rtx_REG (DImode, regno + 4);
4967 ops[4] = operands[2];
4968 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4972 [(set_attr "type" "neon_store3_one_lane<q>")]
4975 (define_insn "neon_vst3_lane<mode>"
4976 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4977 (unspec:<V_three_elem>
4978 [(match_operand:CI 1 "s_register_operand" "w")
4979 (match_operand:SI 2 "immediate_operand" "i")
4980 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4984 HOST_WIDE_INT lane = INTVAL (operands[2]);
4985 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4986 int regno = REGNO (operands[1]);
4988 if (lane < 0 || lane >= max)
4989 error ("lane out of range");
4990 else if (lane >= max / 2)
4995 ops[0] = operands[0];
4996 ops[1] = gen_rtx_REG (DImode, regno);
4997 ops[2] = gen_rtx_REG (DImode, regno + 4);
4998 ops[3] = gen_rtx_REG (DImode, regno + 8);
4999 ops[4] = GEN_INT (lane);
5000 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5004 [(set_attr "type" "neon_store3_one_lane<q>")]
5007 (define_expand "vec_load_lanesoi<mode>"
5008 [(set (match_operand:OI 0 "s_register_operand")
5009 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5010 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5014 (define_insn "neon_vld4<mode>"
5015 [(set (match_operand:OI 0 "s_register_operand" "=w")
5016 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5017 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5021 if (<V_sz_elem> == 64)
5022 return "vld1.64\t%h0, %A1";
5024 return "vld4.<V_sz_elem>\t%h0, %A1";
5027 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5028 (const_string "neon_load1_4reg<q>")
5029 (const_string "neon_load4_4reg<q>")))]
5032 (define_expand "vec_load_lanesxi<mode>"
5033 [(match_operand:XI 0 "s_register_operand")
5034 (match_operand:XI 1 "neon_struct_operand")
5035 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5038 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5042 (define_expand "neon_vld4<mode>"
5043 [(match_operand:XI 0 "s_register_operand")
5044 (match_operand:XI 1 "neon_struct_operand")
5045 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5050 mem = adjust_address (operands[1], OImode, 0);
5051 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5052 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5053 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5057 (define_insn "neon_vld4qa<mode>"
5058 [(set (match_operand:XI 0 "s_register_operand" "=w")
5059 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5060 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 int regno = REGNO (operands[0]);
5066 ops[0] = gen_rtx_REG (DImode, regno);
5067 ops[1] = gen_rtx_REG (DImode, regno + 4);
5068 ops[2] = gen_rtx_REG (DImode, regno + 8);
5069 ops[3] = gen_rtx_REG (DImode, regno + 12);
5070 ops[4] = operands[1];
5071 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5074 [(set_attr "type" "neon_load4_4reg<q>")]
5077 (define_insn "neon_vld4qb<mode>"
5078 [(set (match_operand:XI 0 "s_register_operand" "=w")
5079 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5080 (match_operand:XI 2 "s_register_operand" "0")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 int regno = REGNO (operands[0]);
5087 ops[0] = gen_rtx_REG (DImode, regno + 2);
5088 ops[1] = gen_rtx_REG (DImode, regno + 6);
5089 ops[2] = gen_rtx_REG (DImode, regno + 10);
5090 ops[3] = gen_rtx_REG (DImode, regno + 14);
5091 ops[4] = operands[1];
5092 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5095 [(set_attr "type" "neon_load4_4reg<q>")]
5098 (define_insn "neon_vld4_lane<mode>"
5099 [(set (match_operand:OI 0 "s_register_operand" "=w")
5100 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5101 (match_operand:OI 2 "s_register_operand" "0")
5102 (match_operand:SI 3 "immediate_operand" "i")
5103 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107 HOST_WIDE_INT lane = INTVAL (operands[3]);
5108 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5109 int regno = REGNO (operands[0]);
5111 if (lane < 0 || lane >= max)
5112 error ("lane out of range");
5113 ops[0] = gen_rtx_REG (DImode, regno);
5114 ops[1] = gen_rtx_REG (DImode, regno + 2);
5115 ops[2] = gen_rtx_REG (DImode, regno + 4);
5116 ops[3] = gen_rtx_REG (DImode, regno + 6);
5117 ops[4] = operands[1];
5118 ops[5] = operands[3];
5119 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5123 [(set_attr "type" "neon_load4_one_lane<q>")]
5126 (define_insn "neon_vld4_lane<mode>"
5127 [(set (match_operand:XI 0 "s_register_operand" "=w")
5128 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5129 (match_operand:XI 2 "s_register_operand" "0")
5130 (match_operand:SI 3 "immediate_operand" "i")
5131 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 HOST_WIDE_INT lane = INTVAL (operands[3]);
5136 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5137 int regno = REGNO (operands[0]);
5139 if (lane < 0 || lane >= max)
5140 error ("lane out of range");
5141 else if (lane >= max / 2)
5146 ops[0] = gen_rtx_REG (DImode, regno);
5147 ops[1] = gen_rtx_REG (DImode, regno + 4);
5148 ops[2] = gen_rtx_REG (DImode, regno + 8);
5149 ops[3] = gen_rtx_REG (DImode, regno + 12);
5150 ops[4] = operands[1];
5151 ops[5] = GEN_INT (lane);
5152 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5156 [(set_attr "type" "neon_load4_one_lane<q>")]
5159 (define_insn "neon_vld4_dup<mode>"
5160 [(set (match_operand:OI 0 "s_register_operand" "=w")
5161 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5162 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5166 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5168 int regno = REGNO (operands[0]);
5170 ops[0] = gen_rtx_REG (DImode, regno);
5171 ops[1] = gen_rtx_REG (DImode, regno + 2);
5172 ops[2] = gen_rtx_REG (DImode, regno + 4);
5173 ops[3] = gen_rtx_REG (DImode, regno + 6);
5174 ops[4] = operands[1];
5175 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5180 return "vld1.<V_sz_elem>\t%h0, %A1";
5183 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5184 (const_string "neon_load4_all_lanes<q>")
5185 (const_string "neon_load1_1reg<q>")))]
5188 (define_expand "vec_store_lanesoi<mode>"
5189 [(set (match_operand:OI 0 "neon_struct_operand")
5190 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5191 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 (define_insn "neon_vst4<mode>"
5196 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5197 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5198 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202 if (<V_sz_elem> == 64)
5203 return "vst1.64\t%h1, %A0";
5205 return "vst4.<V_sz_elem>\t%h1, %A0";
5208 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5209 (const_string "neon_store1_4reg<q>")
5210 (const_string "neon_store4_4reg<q>")))]
5213 (define_expand "vec_store_lanesxi<mode>"
5214 [(match_operand:XI 0 "neon_struct_operand")
5215 (match_operand:XI 1 "s_register_operand")
5216 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5219 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5223 (define_expand "neon_vst4<mode>"
5224 [(match_operand:XI 0 "neon_struct_operand")
5225 (match_operand:XI 1 "s_register_operand")
5226 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5231 mem = adjust_address (operands[0], OImode, 0);
5232 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5233 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5234 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5238 (define_insn "neon_vst4qa<mode>"
5239 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5240 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5241 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245 int regno = REGNO (operands[1]);
5247 ops[0] = operands[0];
5248 ops[1] = gen_rtx_REG (DImode, regno);
5249 ops[2] = gen_rtx_REG (DImode, regno + 4);
5250 ops[3] = gen_rtx_REG (DImode, regno + 8);
5251 ops[4] = gen_rtx_REG (DImode, regno + 12);
5252 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5255 [(set_attr "type" "neon_store4_4reg<q>")]
5258 (define_insn "neon_vst4qb<mode>"
5259 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5260 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5261 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5265 int regno = REGNO (operands[1]);
5267 ops[0] = operands[0];
5268 ops[1] = gen_rtx_REG (DImode, regno + 2);
5269 ops[2] = gen_rtx_REG (DImode, regno + 6);
5270 ops[3] = gen_rtx_REG (DImode, regno + 10);
5271 ops[4] = gen_rtx_REG (DImode, regno + 14);
5272 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5275 [(set_attr "type" "neon_store4_4reg<q>")]
5278 (define_insn "neon_vst4_lane<mode>"
5279 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5280 (unspec:<V_four_elem>
5281 [(match_operand:OI 1 "s_register_operand" "w")
5282 (match_operand:SI 2 "immediate_operand" "i")
5283 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5287 HOST_WIDE_INT lane = INTVAL (operands[2]);
5288 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5289 int regno = REGNO (operands[1]);
5291 if (lane < 0 || lane >= max)
5292 error ("lane out of range");
5293 ops[0] = operands[0];
5294 ops[1] = gen_rtx_REG (DImode, regno);
5295 ops[2] = gen_rtx_REG (DImode, regno + 2);
5296 ops[3] = gen_rtx_REG (DImode, regno + 4);
5297 ops[4] = gen_rtx_REG (DImode, regno + 6);
5298 ops[5] = operands[2];
5299 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5303 [(set_attr "type" "neon_store4_one_lane<q>")]
5306 (define_insn "neon_vst4_lane<mode>"
5307 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5308 (unspec:<V_four_elem>
5309 [(match_operand:XI 1 "s_register_operand" "w")
5310 (match_operand:SI 2 "immediate_operand" "i")
5311 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5315 HOST_WIDE_INT lane = INTVAL (operands[2]);
5316 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5317 int regno = REGNO (operands[1]);
5319 if (lane < 0 || lane >= max)
5320 error ("lane out of range");
5321 else if (lane >= max / 2)
5326 ops[0] = operands[0];
5327 ops[1] = gen_rtx_REG (DImode, regno);
5328 ops[2] = gen_rtx_REG (DImode, regno + 4);
5329 ops[3] = gen_rtx_REG (DImode, regno + 8);
5330 ops[4] = gen_rtx_REG (DImode, regno + 12);
5331 ops[5] = GEN_INT (lane);
5332 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5336 [(set_attr "type" "neon_store4_4reg<q>")]
5339 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5340 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5341 (SE:<V_unpack> (vec_select:<V_HALF>
5342 (match_operand:VU 1 "register_operand" "w")
5343 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5344 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5345 "vmovl.<US><V_sz_elem> %q0, %e1"
5346 [(set_attr "type" "neon_shift_imm_long")]
5349 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5350 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5351 (SE:<V_unpack> (vec_select:<V_HALF>
5352 (match_operand:VU 1 "register_operand" "w")
5353 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5354 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5355 "vmovl.<US><V_sz_elem> %q0, %f1"
5356 [(set_attr "type" "neon_shift_imm_long")]
5359 (define_expand "vec_unpack<US>_hi_<mode>"
5360 [(match_operand:<V_unpack> 0 "register_operand" "")
5361 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5362 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5364 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5367 for (i = 0; i < (<V_mode_nunits>/2); i++)
5368 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5370 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5371 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5378 (define_expand "vec_unpack<US>_lo_<mode>"
5379 [(match_operand:<V_unpack> 0 "register_operand" "")
5380 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5381 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5383 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5386 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5387 RTVEC_ELT (v, i) = GEN_INT (i);
5388 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5389 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5396 (define_insn "neon_vec_<US>mult_lo_<mode>"
5397 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5398 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5399 (match_operand:VU 1 "register_operand" "w")
5400 (match_operand:VU 2 "vect_par_constant_low" "")))
5401 (SE:<V_unpack> (vec_select:<V_HALF>
5402 (match_operand:VU 3 "register_operand" "w")
5404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5405 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5406 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5409 (define_expand "vec_widen_<US>mult_lo_<mode>"
5410 [(match_operand:<V_unpack> 0 "register_operand" "")
5411 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5412 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5415 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5418 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5419 RTVEC_ELT (v, i) = GEN_INT (i);
5420 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5422 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5430 (define_insn "neon_vec_<US>mult_hi_<mode>"
5431 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5432 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5433 (match_operand:VU 1 "register_operand" "w")
5434 (match_operand:VU 2 "vect_par_constant_high" "")))
5435 (SE:<V_unpack> (vec_select:<V_HALF>
5436 (match_operand:VU 3 "register_operand" "w")
5438 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5439 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5440 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5443 (define_expand "vec_widen_<US>mult_hi_<mode>"
5444 [(match_operand:<V_unpack> 0 "register_operand" "")
5445 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5446 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5447 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5449 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5452 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5453 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5454 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5456 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5465 (define_insn "neon_vec_<US>shiftl_<mode>"
5466 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5467 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5468 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5471 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5473 [(set_attr "type" "neon_shift_imm_long")]
5476 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5477 [(match_operand:<V_unpack> 0 "register_operand" "")
5478 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5479 (match_operand:SI 2 "immediate_operand" "i")]
5480 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5482 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5483 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5489 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5490 [(match_operand:<V_unpack> 0 "register_operand" "")
5491 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5492 (match_operand:SI 2 "immediate_operand" "i")]
5493 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5495 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5496 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5497 GET_MODE_SIZE (<V_HALF>mode)),
5503 ;; Vectorize for non-neon-quad case
5504 (define_insn "neon_unpack<US>_<mode>"
5505 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5506 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5508 "vmovl.<US><V_sz_elem> %q0, %P1"
5509 [(set_attr "type" "neon_move")]
5512 (define_expand "vec_unpack<US>_lo_<mode>"
5513 [(match_operand:<V_double_width> 0 "register_operand" "")
5514 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5517 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5518 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5519 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5525 (define_expand "vec_unpack<US>_hi_<mode>"
5526 [(match_operand:<V_double_width> 0 "register_operand" "")
5527 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5530 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5531 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5532 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5538 (define_insn "neon_vec_<US>mult_<mode>"
5539 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5540 (mult:<V_widen> (SE:<V_widen>
5541 (match_operand:VDI 1 "register_operand" "w"))
5543 (match_operand:VDI 2 "register_operand" "w"))))]
5545 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5546 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5549 (define_expand "vec_widen_<US>mult_hi_<mode>"
5550 [(match_operand:<V_double_width> 0 "register_operand" "")
5551 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5552 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5555 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5556 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5557 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5564 (define_expand "vec_widen_<US>mult_lo_<mode>"
5565 [(match_operand:<V_double_width> 0 "register_operand" "")
5566 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5567 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5570 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5571 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5572 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5579 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5580 [(match_operand:<V_double_width> 0 "register_operand" "")
5581 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5582 (match_operand:SI 2 "immediate_operand" "i")]
5585 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5586 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5587 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5593 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5594 [(match_operand:<V_double_width> 0 "register_operand" "")
5595 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5596 (match_operand:SI 2 "immediate_operand" "i")]
5599 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5600 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5601 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5607 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5608 ; because the ordering of vector elements in Q registers is different from what
5609 ; the semantics of the instructions require.
5611 (define_insn "vec_pack_trunc_<mode>"
5612 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5613 (vec_concat:<V_narrow_pack>
5614 (truncate:<V_narrow>
5615 (match_operand:VN 1 "register_operand" "w"))
5616 (truncate:<V_narrow>
5617 (match_operand:VN 2 "register_operand" "w"))))]
5618 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5619 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5620 [(set_attr "type" "multiple")
5621 (set_attr "length" "8")]
5624 ;; For the non-quad case.
5625 (define_insn "neon_vec_pack_trunc_<mode>"
5626 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5627 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5628 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5629 "vmovn.i<V_sz_elem>\t%P0, %q1"
5630 [(set_attr "type" "neon_move_narrow_q")]
5633 (define_expand "vec_pack_trunc_<mode>"
5634 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5635 (match_operand:VSHFT 1 "register_operand" "")
5636 (match_operand:VSHFT 2 "register_operand")]
5637 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5639 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5641 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5642 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5643 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5647 (define_insn "neon_vabd<mode>_2"
5648 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5649 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5650 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5651 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5652 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5654 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5655 (const_string "neon_fp_abd_s<q>")
5656 (const_string "neon_abd<q>")))]
5659 (define_insn "neon_vabd<mode>_3"
5660 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5661 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5662 (match_operand:VDQ 2 "s_register_operand" "w")]
5664 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5665 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5667 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5668 (const_string "neon_fp_abd_s<q>")
5669 (const_string "neon_abd<q>")))]
5672 ;; Copy from core-to-neon regs, then extend, not vice-versa
5675 [(set (match_operand:DI 0 "s_register_operand" "")
5676 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5677 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5678 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5679 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5681 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5685 [(set (match_operand:DI 0 "s_register_operand" "")
5686 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5687 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5688 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5689 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5691 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5695 [(set (match_operand:DI 0 "s_register_operand" "")
5696 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5697 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5698 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5699 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5701 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5705 [(set (match_operand:DI 0 "s_register_operand" "")
5706 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5707 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5708 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5709 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5711 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5715 [(set (match_operand:DI 0 "s_register_operand" "")
5716 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5717 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5718 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5719 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5721 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5725 [(set (match_operand:DI 0 "s_register_operand" "")
5726 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5727 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5728 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5729 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5731 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));