1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1198 ;; shift-count granularity. That's good enough for the middle-end's current
1201 ;; Note that it's not safe to perform such an operation in big-endian mode,
1202 ;; due to element-ordering issues.
1204 (define_expand "vec_shr_<mode>"
1205 [(match_operand:VDQ 0 "s_register_operand" "")
1206 (match_operand:VDQ 1 "s_register_operand" "")
1207 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1208 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1211 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1212 const int width = GET_MODE_BITSIZE (<MODE>mode);
1213 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1214 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1215 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1217 if (num_bits == width)
1219 emit_move_insn (operands[0], operands[1]);
1223 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1224 operands[0] = gen_lowpart (bvecmode, operands[0]);
1225 operands[1] = gen_lowpart (bvecmode, operands[1]);
1227 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1228 GEN_INT (num_bits / BITS_PER_UNIT)));
1232 (define_expand "vec_shl_<mode>"
1233 [(match_operand:VDQ 0 "s_register_operand" "")
1234 (match_operand:VDQ 1 "s_register_operand" "")
1235 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1236 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1239 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1240 const int width = GET_MODE_BITSIZE (<MODE>mode);
1241 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1242 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1243 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1247 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1251 num_bits = width - num_bits;
1253 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1254 operands[0] = gen_lowpart (bvecmode, operands[0]);
1255 operands[1] = gen_lowpart (bvecmode, operands[1]);
1257 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1258 GEN_INT (num_bits / BITS_PER_UNIT)));
1262 ;; Helpers for quad-word reduction operations
1264 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1265 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1266 ; N/2-element vector.
1268 (define_insn "quad_halves_<code>v4si"
1269 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1271 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1272 (parallel [(const_int 0) (const_int 1)]))
1273 (vec_select:V2SI (match_dup 1)
1274 (parallel [(const_int 2) (const_int 3)]))))]
1276 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1277 [(set_attr "vqh_mnem" "<VQH_mnem>")
1278 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1281 (define_insn "quad_halves_<code>v4sf"
1282 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1284 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1285 (parallel [(const_int 0) (const_int 1)]))
1286 (vec_select:V2SF (match_dup 1)
1287 (parallel [(const_int 2) (const_int 3)]))))]
1288 "TARGET_NEON && flag_unsafe_math_optimizations"
1289 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1294 (define_insn "quad_halves_<code>v8hi"
1295 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1297 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)]))
1300 (vec_select:V4HI (match_dup 1)
1301 (parallel [(const_int 4) (const_int 5)
1302 (const_int 6) (const_int 7)]))))]
1304 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1305 [(set_attr "vqh_mnem" "<VQH_mnem>")
1306 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1309 (define_insn "quad_halves_<code>v16qi"
1310 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1312 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1313 (parallel [(const_int 0) (const_int 1)
1314 (const_int 2) (const_int 3)
1315 (const_int 4) (const_int 5)
1316 (const_int 6) (const_int 7)]))
1317 (vec_select:V8QI (match_dup 1)
1318 (parallel [(const_int 8) (const_int 9)
1319 (const_int 10) (const_int 11)
1320 (const_int 12) (const_int 13)
1321 (const_int 14) (const_int 15)]))))]
1323 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1324 [(set_attr "vqh_mnem" "<VQH_mnem>")
1325 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1328 (define_expand "move_hi_quad_<mode>"
1329 [(match_operand:ANY128 0 "s_register_operand" "")
1330 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1333 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1334 GET_MODE_SIZE (<V_HALF>mode)),
1339 (define_expand "move_lo_quad_<mode>"
1340 [(match_operand:ANY128 0 "s_register_operand" "")
1341 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1344 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1350 ;; Reduction operations
1352 (define_expand "reduc_splus_<mode>"
1353 [(match_operand:VD 0 "s_register_operand" "")
1354 (match_operand:VD 1 "s_register_operand" "")]
1355 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1357 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1358 &gen_neon_vpadd_internal<mode>);
1362 (define_expand "reduc_splus_<mode>"
1363 [(match_operand:VQ 0 "s_register_operand" "")
1364 (match_operand:VQ 1 "s_register_operand" "")]
1365 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1366 && !BYTES_BIG_ENDIAN"
1368 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1369 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1371 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1372 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1373 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1378 (define_insn "reduc_splus_v2di"
1379 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1380 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1382 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1383 "vadd.i64\t%e0, %e1, %f1"
1384 [(set_attr "type" "neon_add_q")]
1387 ;; NEON does not distinguish between signed and unsigned addition except on
1388 ;; widening operations.
1389 (define_expand "reduc_uplus_<mode>"
1390 [(match_operand:VDQI 0 "s_register_operand" "")
1391 (match_operand:VDQI 1 "s_register_operand" "")]
1392 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1394 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1398 (define_expand "reduc_smin_<mode>"
1399 [(match_operand:VD 0 "s_register_operand" "")
1400 (match_operand:VD 1 "s_register_operand" "")]
1401 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1403 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1404 &gen_neon_vpsmin<mode>);
1408 (define_expand "reduc_smin_<mode>"
1409 [(match_operand:VQ 0 "s_register_operand" "")
1410 (match_operand:VQ 1 "s_register_operand" "")]
1411 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1412 && !BYTES_BIG_ENDIAN"
1414 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1415 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1417 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1418 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1419 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1424 (define_expand "reduc_smax_<mode>"
1425 [(match_operand:VD 0 "s_register_operand" "")
1426 (match_operand:VD 1 "s_register_operand" "")]
1427 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1429 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1430 &gen_neon_vpsmax<mode>);
1434 (define_expand "reduc_smax_<mode>"
1435 [(match_operand:VQ 0 "s_register_operand" "")
1436 (match_operand:VQ 1 "s_register_operand" "")]
1437 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1438 && !BYTES_BIG_ENDIAN"
1440 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1441 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1443 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1444 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1445 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1450 (define_expand "reduc_umin_<mode>"
1451 [(match_operand:VDI 0 "s_register_operand" "")
1452 (match_operand:VDI 1 "s_register_operand" "")]
1455 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1456 &gen_neon_vpumin<mode>);
1460 (define_expand "reduc_umin_<mode>"
1461 [(match_operand:VQI 0 "s_register_operand" "")
1462 (match_operand:VQI 1 "s_register_operand" "")]
1463 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1465 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1466 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1468 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1469 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1470 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1475 (define_expand "reduc_umax_<mode>"
1476 [(match_operand:VDI 0 "s_register_operand" "")
1477 (match_operand:VDI 1 "s_register_operand" "")]
1480 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1481 &gen_neon_vpumax<mode>);
1485 (define_expand "reduc_umax_<mode>"
1486 [(match_operand:VQI 0 "s_register_operand" "")
1487 (match_operand:VQI 1 "s_register_operand" "")]
1488 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1490 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1491 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1493 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1494 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1495 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1500 (define_insn "neon_vpadd_internal<mode>"
1501 [(set (match_operand:VD 0 "s_register_operand" "=w")
1502 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1503 (match_operand:VD 2 "s_register_operand" "w")]
1506 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1507 ;; Assume this schedules like vadd.
1509 (if_then_else (match_test "<Is_float_mode>")
1510 (const_string "neon_fp_reduc_add_s<q>")
1511 (const_string "neon_reduc_add<q>")))]
1514 (define_insn "neon_vpsmin<mode>"
1515 [(set (match_operand:VD 0 "s_register_operand" "=w")
1516 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1517 (match_operand:VD 2 "s_register_operand" "w")]
1520 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1522 (if_then_else (match_test "<Is_float_mode>")
1523 (const_string "neon_fp_reduc_minmax_s<q>")
1524 (const_string "neon_reduc_minmax<q>")))]
1527 (define_insn "neon_vpsmax<mode>"
1528 [(set (match_operand:VD 0 "s_register_operand" "=w")
1529 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1530 (match_operand:VD 2 "s_register_operand" "w")]
1533 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1535 (if_then_else (match_test "<Is_float_mode>")
1536 (const_string "neon_fp_reduc_minmax_s<q>")
1537 (const_string "neon_reduc_minmax<q>")))]
1540 (define_insn "neon_vpumin<mode>"
1541 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1542 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1543 (match_operand:VDI 2 "s_register_operand" "w")]
1546 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1547 [(set_attr "type" "neon_reduc_minmax<q>")]
1550 (define_insn "neon_vpumax<mode>"
1551 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1552 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1553 (match_operand:VDI 2 "s_register_operand" "w")]
1556 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1557 [(set_attr "type" "neon_reduc_minmax<q>")]
1560 ;; Saturating arithmetic
1562 ; NOTE: Neon supports many more saturating variants of instructions than the
1563 ; following, but these are all GCC currently understands.
1564 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1565 ; yet either, although these patterns may be used by intrinsics when they're
1568 (define_insn "*ss_add<mode>_neon"
1569 [(set (match_operand:VD 0 "s_register_operand" "=w")
1570 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1571 (match_operand:VD 2 "s_register_operand" "w")))]
1573 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1574 [(set_attr "type" "neon_qadd<q>")]
1577 (define_insn "*us_add<mode>_neon"
1578 [(set (match_operand:VD 0 "s_register_operand" "=w")
1579 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1580 (match_operand:VD 2 "s_register_operand" "w")))]
1582 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1583 [(set_attr "type" "neon_qadd<q>")]
1586 (define_insn "*ss_sub<mode>_neon"
1587 [(set (match_operand:VD 0 "s_register_operand" "=w")
1588 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1589 (match_operand:VD 2 "s_register_operand" "w")))]
1591 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1592 [(set_attr "type" "neon_qsub<q>")]
1595 (define_insn "*us_sub<mode>_neon"
1596 [(set (match_operand:VD 0 "s_register_operand" "=w")
1597 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1598 (match_operand:VD 2 "s_register_operand" "w")))]
1600 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1601 [(set_attr "type" "neon_qsub<q>")]
1604 ;; Conditional instructions. These are comparisons with conditional moves for
1605 ;; vectors. They perform the assignment:
1607 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1609 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1612 (define_expand "vcond<mode><mode>"
1613 [(set (match_operand:VDQW 0 "s_register_operand" "")
1615 (match_operator 3 "comparison_operator"
1616 [(match_operand:VDQW 4 "s_register_operand" "")
1617 (match_operand:VDQW 5 "nonmemory_operand" "")])
1618 (match_operand:VDQW 1 "s_register_operand" "")
1619 (match_operand:VDQW 2 "s_register_operand" "")))]
1620 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1622 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1624 rtx magic_rtx = GEN_INT (magic_word);
1626 int use_zero_form = 0;
1627 int swap_bsl_operands = 0;
1628 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1629 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1631 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1632 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1634 switch (GET_CODE (operands[3]))
1641 if (operands[5] == CONST0_RTX (<MODE>mode))
1648 if (!REG_P (operands[5]))
1649 operands[5] = force_reg (<MODE>mode, operands[5]);
1652 switch (GET_CODE (operands[3]))
1662 base_comparison = gen_neon_vcge<mode>;
1663 complimentary_comparison = gen_neon_vcgt<mode>;
1671 base_comparison = gen_neon_vcgt<mode>;
1672 complimentary_comparison = gen_neon_vcge<mode>;
1677 base_comparison = gen_neon_vceq<mode>;
1678 complimentary_comparison = gen_neon_vceq<mode>;
1684 switch (GET_CODE (operands[3]))
1691 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1692 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1698 Note that there also exist direct comparison against 0 forms,
1699 so catch those as a special case. */
1703 switch (GET_CODE (operands[3]))
1706 base_comparison = gen_neon_vclt<mode>;
1709 base_comparison = gen_neon_vcle<mode>;
1712 /* Do nothing, other zero form cases already have the correct
1719 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1721 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1728 /* Vector compare returns false for lanes which are unordered, so if we use
1729 the inverse of the comparison we actually want to emit, then
1730 swap the operands to BSL, we will end up with the correct result.
1731 Note that a NE NaN and NaN NE b are true for all a, b.
1733 Our transformations are:
1738 a NE b -> !(a EQ b) */
1741 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1743 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1745 swap_bsl_operands = 1;
1748 /* We check (a > b || b > a). combining these comparisons give us
1749 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1750 will then give us (a == b || a UNORDERED b) as intended. */
1752 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1753 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1754 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1755 swap_bsl_operands = 1;
1758 /* Operands are ORDERED iff (a > b || b >= a).
1759 Swapping the operands to BSL will give the UNORDERED case. */
1760 swap_bsl_operands = 1;
1763 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1764 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1765 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1771 if (swap_bsl_operands)
1772 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1775 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1780 (define_expand "vcondu<mode><mode>"
1781 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1783 (match_operator 3 "arm_comparison_operator"
1784 [(match_operand:VDQIW 4 "s_register_operand" "")
1785 (match_operand:VDQIW 5 "s_register_operand" "")])
1786 (match_operand:VDQIW 1 "s_register_operand" "")
1787 (match_operand:VDQIW 2 "s_register_operand" "")))]
1791 int inverse = 0, immediate_zero = 0;
1793 mask = gen_reg_rtx (<V_cmp_result>mode);
1795 if (operands[5] == CONST0_RTX (<MODE>mode))
1797 else if (!REG_P (operands[5]))
1798 operands[5] = force_reg (<MODE>mode, operands[5]);
1800 switch (GET_CODE (operands[3]))
1803 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1808 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1813 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1819 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1822 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1828 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1831 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1836 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1846 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1849 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1855 ;; Patterns for builtins.
1857 ; good for plain vadd, vaddq.
1859 (define_expand "neon_vadd<mode>"
1860 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1861 (match_operand:VCVTF 1 "s_register_operand" "w")
1862 (match_operand:VCVTF 2 "s_register_operand" "w")
1863 (match_operand:SI 3 "immediate_operand" "i")]
1866 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1867 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1869 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1874 ; Note that NEON operations don't support the full IEEE 754 standard: in
1875 ; particular, denormal values are flushed to zero. This means that GCC cannot
1876 ; use those instructions for autovectorization, etc. unless
1877 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1878 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1879 ; header) must work in either case: if -funsafe-math-optimizations is given,
1880 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1881 ; expand to unspecs (which may potentially limit the extent to which they might
1882 ; be optimized by generic code).
1884 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1886 (define_insn "neon_vadd<mode>_unspec"
1887 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1888 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1889 (match_operand:VCVTF 2 "s_register_operand" "w")]
1892 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1894 (if_then_else (match_test "<Is_float_mode>")
1895 (const_string "neon_fp_addsub_s<q>")
1896 (const_string "neon_add<q>")))]
1899 ; operand 3 represents in bits:
1900 ; bit 0: signed (vs unsigned).
1901 ; bit 1: rounding (vs none).
1903 (define_insn "neon_vaddl<mode>"
1904 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1905 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1906 (match_operand:VDI 2 "s_register_operand" "w")
1907 (match_operand:SI 3 "immediate_operand" "i")]
1910 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1911 [(set_attr "type" "neon_add_long")]
1914 (define_insn "neon_vaddw<mode>"
1915 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1916 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1917 (match_operand:VDI 2 "s_register_operand" "w")
1918 (match_operand:SI 3 "immediate_operand" "i")]
1921 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1922 [(set_attr "type" "neon_add_widen")]
1927 (define_insn "neon_vhadd<mode>"
1928 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1929 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1930 (match_operand:VDQIW 2 "s_register_operand" "w")
1931 (match_operand:SI 3 "immediate_operand" "i")]
1934 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1935 [(set_attr "type" "neon_add_halve_q")]
1938 (define_insn "neon_vqadd<mode>"
1939 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1940 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1941 (match_operand:VDQIX 2 "s_register_operand" "w")
1942 (match_operand:SI 3 "immediate_operand" "i")]
1945 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1946 [(set_attr "type" "neon_qadd<q>")]
1949 (define_insn "neon_vaddhn<mode>"
1950 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1951 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1952 (match_operand:VN 2 "s_register_operand" "w")
1953 (match_operand:SI 3 "immediate_operand" "i")]
1956 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1957 [(set_attr "type" "neon_add_halve_narrow_q")]
1960 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1961 ;; polynomial multiplication case that can specified by operand 3.
1962 (define_insn "neon_vmul<mode>"
1963 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1964 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1965 (match_operand:VDQW 2 "s_register_operand" "w")
1966 (match_operand:SI 3 "immediate_operand" "i")]
1969 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1971 (if_then_else (match_test "<Is_float_mode>")
1972 (const_string "neon_fp_mul_s<q>")
1973 (const_string "neon_mul_<V_elem_ch><q>")))]
1976 (define_expand "neon_vmla<mode>"
1977 [(match_operand:VDQW 0 "s_register_operand" "=w")
1978 (match_operand:VDQW 1 "s_register_operand" "0")
1979 (match_operand:VDQW 2 "s_register_operand" "w")
1980 (match_operand:VDQW 3 "s_register_operand" "w")
1981 (match_operand:SI 4 "immediate_operand" "i")]
1984 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1985 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1986 operands[2], operands[3]));
1988 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1989 operands[2], operands[3]));
1993 (define_expand "neon_vfma<VCVTF:mode>"
1994 [(match_operand:VCVTF 0 "s_register_operand")
1995 (match_operand:VCVTF 1 "s_register_operand")
1996 (match_operand:VCVTF 2 "s_register_operand")
1997 (match_operand:VCVTF 3 "s_register_operand")
1998 (match_operand:SI 4 "immediate_operand")]
1999 "TARGET_NEON && TARGET_FMA"
2001 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2006 (define_expand "neon_vfms<VCVTF:mode>"
2007 [(match_operand:VCVTF 0 "s_register_operand")
2008 (match_operand:VCVTF 1 "s_register_operand")
2009 (match_operand:VCVTF 2 "s_register_operand")
2010 (match_operand:VCVTF 3 "s_register_operand")
2011 (match_operand:SI 4 "immediate_operand")]
2012 "TARGET_NEON && TARGET_FMA"
2014 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2019 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2021 (define_insn "neon_vmla<mode>_unspec"
2022 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2023 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2024 (match_operand:VDQW 2 "s_register_operand" "w")
2025 (match_operand:VDQW 3 "s_register_operand" "w")]
2028 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2030 (if_then_else (match_test "<Is_float_mode>")
2031 (const_string "neon_fp_mla_s<q>")
2032 (const_string "neon_mla_<V_elem_ch><q>")))]
2035 (define_insn "neon_vmlal<mode>"
2036 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2037 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2038 (match_operand:VW 2 "s_register_operand" "w")
2039 (match_operand:VW 3 "s_register_operand" "w")
2040 (match_operand:SI 4 "immediate_operand" "i")]
2043 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2044 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2047 (define_expand "neon_vmls<mode>"
2048 [(match_operand:VDQW 0 "s_register_operand" "=w")
2049 (match_operand:VDQW 1 "s_register_operand" "0")
2050 (match_operand:VDQW 2 "s_register_operand" "w")
2051 (match_operand:VDQW 3 "s_register_operand" "w")
2052 (match_operand:SI 4 "immediate_operand" "i")]
2055 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2056 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2057 operands[1], operands[2], operands[3]));
2059 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2060 operands[2], operands[3]));
2064 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2066 (define_insn "neon_vmls<mode>_unspec"
2067 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2068 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2069 (match_operand:VDQW 2 "s_register_operand" "w")
2070 (match_operand:VDQW 3 "s_register_operand" "w")]
2073 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2075 (if_then_else (match_test "<Is_float_mode>")
2076 (const_string "neon_fp_mla_s<q>")
2077 (const_string "neon_mla_<V_elem_ch><q>")))]
2080 (define_insn "neon_vmlsl<mode>"
2081 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2082 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2083 (match_operand:VW 2 "s_register_operand" "w")
2084 (match_operand:VW 3 "s_register_operand" "w")
2085 (match_operand:SI 4 "immediate_operand" "i")]
2088 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2089 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2092 (define_insn "neon_vqdmulh<mode>"
2093 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2094 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2095 (match_operand:VMDQI 2 "s_register_operand" "w")
2096 (match_operand:SI 3 "immediate_operand" "i")]
2099 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2100 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2103 (define_insn "neon_vqdmlal<mode>"
2104 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2105 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2106 (match_operand:VMDI 2 "s_register_operand" "w")
2107 (match_operand:VMDI 3 "s_register_operand" "w")
2108 (match_operand:SI 4 "immediate_operand" "i")]
2111 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2112 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2115 (define_insn "neon_vqdmlsl<mode>"
2116 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2117 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2118 (match_operand:VMDI 2 "s_register_operand" "w")
2119 (match_operand:VMDI 3 "s_register_operand" "w")
2120 (match_operand:SI 4 "immediate_operand" "i")]
2123 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2124 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2127 (define_insn "neon_vmull<mode>"
2128 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2129 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2130 (match_operand:VW 2 "s_register_operand" "w")
2131 (match_operand:SI 3 "immediate_operand" "i")]
2134 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2135 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2138 (define_insn "neon_vqdmull<mode>"
2139 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2140 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2141 (match_operand:VMDI 2 "s_register_operand" "w")
2142 (match_operand:SI 3 "immediate_operand" "i")]
2145 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2146 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2149 (define_expand "neon_vsub<mode>"
2150 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2151 (match_operand:VCVTF 1 "s_register_operand" "w")
2152 (match_operand:VCVTF 2 "s_register_operand" "w")
2153 (match_operand:SI 3 "immediate_operand" "i")]
2156 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2157 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2159 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2164 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2166 (define_insn "neon_vsub<mode>_unspec"
2167 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2168 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2169 (match_operand:VCVTF 2 "s_register_operand" "w")]
2172 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2174 (if_then_else (match_test "<Is_float_mode>")
2175 (const_string "neon_fp_addsub_s<q>")
2176 (const_string "neon_sub<q>")))]
2179 (define_insn "neon_vsubl<mode>"
2180 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2181 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2182 (match_operand:VDI 2 "s_register_operand" "w")
2183 (match_operand:SI 3 "immediate_operand" "i")]
2186 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2187 [(set_attr "type" "neon_sub_long")]
2190 (define_insn "neon_vsubw<mode>"
2191 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2192 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2193 (match_operand:VDI 2 "s_register_operand" "w")
2194 (match_operand:SI 3 "immediate_operand" "i")]
2197 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2198 [(set_attr "type" "neon_sub_widen")]
2201 (define_insn "neon_vqsub<mode>"
2202 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2203 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2204 (match_operand:VDQIX 2 "s_register_operand" "w")
2205 (match_operand:SI 3 "immediate_operand" "i")]
2208 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2209 [(set_attr "type" "neon_qsub<q>")]
2212 (define_insn "neon_vhsub<mode>"
2213 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2214 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2215 (match_operand:VDQIW 2 "s_register_operand" "w")
2216 (match_operand:SI 3 "immediate_operand" "i")]
2219 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2220 [(set_attr "type" "neon_sub_halve<q>")]
2223 (define_insn "neon_vsubhn<mode>"
2224 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2225 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2226 (match_operand:VN 2 "s_register_operand" "w")
2227 (match_operand:SI 3 "immediate_operand" "i")]
2230 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2231 [(set_attr "type" "neon_sub_halve_narrow_q")]
2234 (define_insn "neon_vceq<mode>"
2235 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2236 (unspec:<V_cmp_result>
2237 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2238 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2239 (match_operand:SI 3 "immediate_operand" "i,i")]
2243 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2244 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2246 (if_then_else (match_test "<Is_float_mode>")
2247 (const_string "neon_fp_compare_s<q>")
2248 (if_then_else (match_operand 2 "zero_operand")
2249 (const_string "neon_compare_zero<q>")
2250 (const_string "neon_compare<q>"))))]
2253 (define_insn "neon_vcge<mode>"
2254 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2255 (unspec:<V_cmp_result>
2256 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2257 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2258 (match_operand:SI 3 "immediate_operand" "i,i")]
2262 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2263 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2265 (if_then_else (match_test "<Is_float_mode>")
2266 (const_string "neon_fp_compare_s<q>")
2267 (if_then_else (match_operand 2 "zero_operand")
2268 (const_string "neon_compare_zero<q>")
2269 (const_string "neon_compare<q>"))))]
2272 (define_insn "neon_vcgeu<mode>"
2273 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2274 (unspec:<V_cmp_result>
2275 [(match_operand:VDQIW 1 "s_register_operand" "w")
2276 (match_operand:VDQIW 2 "s_register_operand" "w")
2277 (match_operand:SI 3 "immediate_operand" "i")]
2280 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2281 [(set_attr "type" "neon_compare<q>")]
2284 (define_insn "neon_vcgt<mode>"
2285 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2286 (unspec:<V_cmp_result>
2287 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2288 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2289 (match_operand:SI 3 "immediate_operand" "i,i")]
2293 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2294 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2296 (if_then_else (match_test "<Is_float_mode>")
2297 (const_string "neon_fp_compare_s<q>")
2298 (if_then_else (match_operand 2 "zero_operand")
2299 (const_string "neon_compare_zero<q>")
2300 (const_string "neon_compare<q>"))))]
2303 (define_insn "neon_vcgtu<mode>"
2304 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2305 (unspec:<V_cmp_result>
2306 [(match_operand:VDQIW 1 "s_register_operand" "w")
2307 (match_operand:VDQIW 2 "s_register_operand" "w")
2308 (match_operand:SI 3 "immediate_operand" "i")]
2311 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2312 [(set_attr "type" "neon_compare<q>")]
2315 ;; VCLE and VCLT only support comparisons with immediate zero (register
2316 ;; variants are VCGE and VCGT with operands reversed).
2318 (define_insn "neon_vcle<mode>"
2319 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2320 (unspec:<V_cmp_result>
2321 [(match_operand:VDQW 1 "s_register_operand" "w")
2322 (match_operand:VDQW 2 "zero_operand" "Dz")
2323 (match_operand:SI 3 "immediate_operand" "i")]
2326 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2328 (if_then_else (match_test "<Is_float_mode>")
2329 (const_string "neon_fp_compare_s<q>")
2330 (if_then_else (match_operand 2 "zero_operand")
2331 (const_string "neon_compare_zero<q>")
2332 (const_string "neon_compare<q>"))))]
2335 (define_insn "neon_vclt<mode>"
2336 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2337 (unspec:<V_cmp_result>
2338 [(match_operand:VDQW 1 "s_register_operand" "w")
2339 (match_operand:VDQW 2 "zero_operand" "Dz")
2340 (match_operand:SI 3 "immediate_operand" "i")]
2343 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2345 (if_then_else (match_test "<Is_float_mode>")
2346 (const_string "neon_fp_compare_s<q>")
2347 (if_then_else (match_operand 2 "zero_operand")
2348 (const_string "neon_compare_zero<q>")
2349 (const_string "neon_compare<q>"))))]
2352 (define_insn "neon_vcage<mode>"
2353 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2354 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2355 (match_operand:VCVTF 2 "s_register_operand" "w")
2356 (match_operand:SI 3 "immediate_operand" "i")]
2359 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2360 [(set_attr "type" "neon_fp_compare_s<q>")]
2363 (define_insn "neon_vcagt<mode>"
2364 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2365 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2366 (match_operand:VCVTF 2 "s_register_operand" "w")
2367 (match_operand:SI 3 "immediate_operand" "i")]
2370 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2371 [(set_attr "type" "neon_fp_compare_s<q>")]
2374 (define_insn "neon_vtst<mode>"
2375 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2376 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2377 (match_operand:VDQIW 2 "s_register_operand" "w")
2378 (match_operand:SI 3 "immediate_operand" "i")]
2381 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2382 [(set_attr "type" "neon_tst<q>")]
2385 (define_insn "neon_vabd<mode>"
2386 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2387 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2388 (match_operand:VDQW 2 "s_register_operand" "w")
2389 (match_operand:SI 3 "immediate_operand" "i")]
2392 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2394 (if_then_else (match_test "<Is_float_mode>")
2395 (const_string "neon_fp_abd_s<q>")
2396 (const_string "neon_abd<q>")))]
2399 (define_insn "neon_vabdl<mode>"
2400 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2401 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2402 (match_operand:VW 2 "s_register_operand" "w")
2403 (match_operand:SI 3 "immediate_operand" "i")]
2406 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2407 [(set_attr "type" "neon_abd_long")]
2410 (define_insn "neon_vaba<mode>"
2411 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2412 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2413 (match_operand:VDQIW 3 "s_register_operand" "w")
2414 (match_operand:SI 4 "immediate_operand" "i")]
2416 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2418 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2419 [(set_attr "type" "neon_arith_acc<q>")]
2422 (define_insn "neon_vabal<mode>"
2423 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2424 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2425 (match_operand:VW 3 "s_register_operand" "w")
2426 (match_operand:SI 4 "immediate_operand" "i")]
2428 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2430 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2431 [(set_attr "type" "neon_arith_acc<q>")]
2434 (define_insn "neon_vmax<mode>"
2435 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2436 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2437 (match_operand:VDQW 2 "s_register_operand" "w")
2438 (match_operand:SI 3 "immediate_operand" "i")]
2441 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2443 (if_then_else (match_test "<Is_float_mode>")
2444 (const_string "neon_fp_minmax_s<q>")
2445 (const_string "neon_minmax<q>")))]
2448 (define_insn "neon_vmin<mode>"
2449 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2450 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2451 (match_operand:VDQW 2 "s_register_operand" "w")
2452 (match_operand:SI 3 "immediate_operand" "i")]
2455 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2457 (if_then_else (match_test "<Is_float_mode>")
2458 (const_string "neon_fp_minmax_s<q>")
2459 (const_string "neon_minmax<q>")))]
2462 (define_expand "neon_vpadd<mode>"
2463 [(match_operand:VD 0 "s_register_operand" "=w")
2464 (match_operand:VD 1 "s_register_operand" "w")
2465 (match_operand:VD 2 "s_register_operand" "w")
2466 (match_operand:SI 3 "immediate_operand" "i")]
2469 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2474 (define_insn "neon_vpaddl<mode>"
2475 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2476 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2477 (match_operand:SI 2 "immediate_operand" "i")]
2480 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2481 [(set_attr "type" "neon_reduc_add_long")]
2484 (define_insn "neon_vpadal<mode>"
2485 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2486 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2487 (match_operand:VDQIW 2 "s_register_operand" "w")
2488 (match_operand:SI 3 "immediate_operand" "i")]
2491 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2492 [(set_attr "type" "neon_reduc_add_acc")]
2495 (define_insn "neon_vpmax<mode>"
2496 [(set (match_operand:VD 0 "s_register_operand" "=w")
2497 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2498 (match_operand:VD 2 "s_register_operand" "w")
2499 (match_operand:SI 3 "immediate_operand" "i")]
2502 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2504 (if_then_else (match_test "<Is_float_mode>")
2505 (const_string "neon_fp_reduc_minmax_s<q>")
2506 (const_string "neon_reduc_minmax<q>")))]
2509 (define_insn "neon_vpmin<mode>"
2510 [(set (match_operand:VD 0 "s_register_operand" "=w")
2511 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2512 (match_operand:VD 2 "s_register_operand" "w")
2513 (match_operand:SI 3 "immediate_operand" "i")]
2516 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2518 (if_then_else (match_test "<Is_float_mode>")
2519 (const_string "neon_fp_reduc_minmax_s<q>")
2520 (const_string "neon_reduc_minmax<q>")))]
2523 (define_insn "neon_vrecps<mode>"
2524 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2525 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2526 (match_operand:VCVTF 2 "s_register_operand" "w")
2527 (match_operand:SI 3 "immediate_operand" "i")]
2530 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2531 [(set_attr "type" "neon_fp_recps_s<q>")]
2534 (define_insn "neon_vrsqrts<mode>"
2535 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2536 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2537 (match_operand:VCVTF 2 "s_register_operand" "w")
2538 (match_operand:SI 3 "immediate_operand" "i")]
2541 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2542 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2545 (define_expand "neon_vabs<mode>"
2546 [(match_operand:VDQW 0 "s_register_operand" "")
2547 (match_operand:VDQW 1 "s_register_operand" "")
2548 (match_operand:SI 2 "immediate_operand" "")]
2551 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2555 (define_insn "neon_vqabs<mode>"
2556 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2557 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2558 (match_operand:SI 2 "immediate_operand" "i")]
2561 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2562 [(set_attr "type" "neon_qabs<q>")]
2565 (define_insn "neon_bswap<mode>"
2566 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2567 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2569 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2570 [(set_attr "type" "neon_rev<q>")]
2573 (define_expand "neon_vneg<mode>"
2574 [(match_operand:VDQW 0 "s_register_operand" "")
2575 (match_operand:VDQW 1 "s_register_operand" "")
2576 (match_operand:SI 2 "immediate_operand" "")]
2579 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2583 (define_expand "neon_copysignf<mode>"
2584 [(match_operand:VCVTF 0 "register_operand")
2585 (match_operand:VCVTF 1 "register_operand")
2586 (match_operand:VCVTF 2 "register_operand")]
2590 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2591 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2592 rtvec v = rtvec_alloc (n_elt);
2594 /* Create bitmask for vector select. */
2595 for (i = 0; i < n_elt; ++i)
2596 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2598 emit_move_insn (v_bitmask,
2599 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2600 emit_move_insn (operands[0], operands[2]);
2601 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2602 <VCVTF:V_cmp_result>mode, 0);
2603 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2610 (define_insn "neon_vqneg<mode>"
2611 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2612 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2613 (match_operand:SI 2 "immediate_operand" "i")]
2616 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2617 [(set_attr "type" "neon_qneg<q>")]
2620 (define_insn "neon_vcls<mode>"
2621 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2622 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2623 (match_operand:SI 2 "immediate_operand" "i")]
2626 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2627 [(set_attr "type" "neon_cls<q>")]
2630 (define_insn "clz<mode>2"
2631 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2632 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2634 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2635 [(set_attr "type" "neon_cnt<q>")]
2638 (define_expand "neon_vclz<mode>"
2639 [(match_operand:VDQIW 0 "s_register_operand" "")
2640 (match_operand:VDQIW 1 "s_register_operand" "")
2641 (match_operand:SI 2 "immediate_operand" "")]
2644 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2648 (define_insn "popcount<mode>2"
2649 [(set (match_operand:VE 0 "s_register_operand" "=w")
2650 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2652 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2653 [(set_attr "type" "neon_cnt<q>")]
2656 (define_expand "neon_vcnt<mode>"
2657 [(match_operand:VE 0 "s_register_operand" "=w")
2658 (match_operand:VE 1 "s_register_operand" "w")
2659 (match_operand:SI 2 "immediate_operand" "i")]
2662 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2666 (define_insn "neon_vrecpe<mode>"
2667 [(set (match_operand:V32 0 "s_register_operand" "=w")
2668 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2669 (match_operand:SI 2 "immediate_operand" "i")]
2672 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2673 [(set_attr "type" "neon_fp_recpe_s<q>")]
2676 (define_insn "neon_vrsqrte<mode>"
2677 [(set (match_operand:V32 0 "s_register_operand" "=w")
2678 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2679 (match_operand:SI 2 "immediate_operand" "i")]
2682 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2683 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2686 (define_expand "neon_vmvn<mode>"
2687 [(match_operand:VDQIW 0 "s_register_operand" "")
2688 (match_operand:VDQIW 1 "s_register_operand" "")
2689 (match_operand:SI 2 "immediate_operand" "")]
2692 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2696 (define_insn "neon_vget_lane<mode>_sext_internal"
2697 [(set (match_operand:SI 0 "s_register_operand" "=r")
2699 (vec_select:<V_elem>
2700 (match_operand:VD 1 "s_register_operand" "w")
2701 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2704 if (BYTES_BIG_ENDIAN)
2706 int elt = INTVAL (operands[2]);
2707 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2708 operands[2] = GEN_INT (elt);
2710 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2712 [(set_attr "type" "neon_to_gp")]
2715 (define_insn "neon_vget_lane<mode>_zext_internal"
2716 [(set (match_operand:SI 0 "s_register_operand" "=r")
2718 (vec_select:<V_elem>
2719 (match_operand:VD 1 "s_register_operand" "w")
2720 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2723 if (BYTES_BIG_ENDIAN)
2725 int elt = INTVAL (operands[2]);
2726 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2727 operands[2] = GEN_INT (elt);
2729 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2731 [(set_attr "type" "neon_to_gp")]
2734 (define_insn "neon_vget_lane<mode>_sext_internal"
2735 [(set (match_operand:SI 0 "s_register_operand" "=r")
2737 (vec_select:<V_elem>
2738 (match_operand:VQ 1 "s_register_operand" "w")
2739 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2743 int regno = REGNO (operands[1]);
2744 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2745 unsigned int elt = INTVAL (operands[2]);
2746 unsigned int elt_adj = elt % halfelts;
2748 if (BYTES_BIG_ENDIAN)
2749 elt_adj = halfelts - 1 - elt_adj;
2751 ops[0] = operands[0];
2752 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2753 ops[2] = GEN_INT (elt_adj);
2754 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2758 [(set_attr "type" "neon_to_gp_q")]
2761 (define_insn "neon_vget_lane<mode>_zext_internal"
2762 [(set (match_operand:SI 0 "s_register_operand" "=r")
2764 (vec_select:<V_elem>
2765 (match_operand:VQ 1 "s_register_operand" "w")
2766 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2770 int regno = REGNO (operands[1]);
2771 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2772 unsigned int elt = INTVAL (operands[2]);
2773 unsigned int elt_adj = elt % halfelts;
2775 if (BYTES_BIG_ENDIAN)
2776 elt_adj = halfelts - 1 - elt_adj;
2778 ops[0] = operands[0];
2779 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2780 ops[2] = GEN_INT (elt_adj);
2781 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2785 [(set_attr "type" "neon_to_gp_q")]
2788 (define_expand "neon_vget_lane<mode>"
2789 [(match_operand:<V_ext> 0 "s_register_operand" "")
2790 (match_operand:VDQW 1 "s_register_operand" "")
2791 (match_operand:SI 2 "immediate_operand" "")
2792 (match_operand:SI 3 "immediate_operand" "")]
2795 HOST_WIDE_INT magic = INTVAL (operands[3]);
2798 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2800 if (BYTES_BIG_ENDIAN)
2802 /* The intrinsics are defined in terms of a model where the
2803 element ordering in memory is vldm order, whereas the generic
2804 RTL is defined in terms of a model where the element ordering
2805 in memory is array order. Convert the lane number to conform
2807 unsigned int elt = INTVAL (operands[2]);
2808 unsigned int reg_nelts
2809 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2810 elt ^= reg_nelts - 1;
2811 operands[2] = GEN_INT (elt);
2814 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2815 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2818 if ((magic & 1) != 0)
2819 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2822 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2829 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2832 (define_expand "neon_vget_lanedi"
2833 [(match_operand:DI 0 "s_register_operand" "=r")
2834 (match_operand:DI 1 "s_register_operand" "w")
2835 (match_operand:SI 2 "immediate_operand" "i")
2836 (match_operand:SI 3 "immediate_operand" "i")]
2839 neon_lane_bounds (operands[2], 0, 1);
2840 emit_move_insn (operands[0], operands[1]);
2844 (define_expand "neon_vget_lanev2di"
2845 [(match_operand:DI 0 "s_register_operand" "")
2846 (match_operand:V2DI 1 "s_register_operand" "")
2847 (match_operand:SI 2 "immediate_operand" "")
2848 (match_operand:SI 3 "immediate_operand" "")]
2851 switch (INTVAL (operands[2]))
2854 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2857 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2860 neon_lane_bounds (operands[2], 0, 1);
2866 (define_expand "neon_vset_lane<mode>"
2867 [(match_operand:VDQ 0 "s_register_operand" "=w")
2868 (match_operand:<V_elem> 1 "s_register_operand" "r")
2869 (match_operand:VDQ 2 "s_register_operand" "0")
2870 (match_operand:SI 3 "immediate_operand" "i")]
2873 unsigned int elt = INTVAL (operands[3]);
2874 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2876 if (BYTES_BIG_ENDIAN)
2878 unsigned int reg_nelts
2879 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2880 elt ^= reg_nelts - 1;
2883 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2884 GEN_INT (1 << elt), operands[2]));
2888 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2890 (define_expand "neon_vset_lanedi"
2891 [(match_operand:DI 0 "s_register_operand" "=w")
2892 (match_operand:DI 1 "s_register_operand" "r")
2893 (match_operand:DI 2 "s_register_operand" "0")
2894 (match_operand:SI 3 "immediate_operand" "i")]
2897 neon_lane_bounds (operands[3], 0, 1);
2898 emit_move_insn (operands[0], operands[1]);
2902 (define_expand "neon_vcreate<mode>"
2903 [(match_operand:VDX 0 "s_register_operand" "")
2904 (match_operand:DI 1 "general_operand" "")]
2907 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2908 emit_move_insn (operands[0], src);
2912 (define_insn "neon_vdup_n<mode>"
2913 [(set (match_operand:VX 0 "s_register_operand" "=w")
2914 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2916 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2917 [(set_attr "type" "neon_from_gp<q>")]
2920 (define_insn "neon_vdup_n<mode>"
2921 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2922 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2925 vdup.<V_sz_elem>\t%<V_reg>0, %1
2926 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2927 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2930 (define_expand "neon_vdup_ndi"
2931 [(match_operand:DI 0 "s_register_operand" "=w")
2932 (match_operand:DI 1 "s_register_operand" "r")]
2935 emit_move_insn (operands[0], operands[1]);
2940 (define_insn "neon_vdup_nv2di"
2941 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2942 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2945 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2946 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2947 [(set_attr "length" "8")
2948 (set_attr "type" "multiple")]
2951 (define_insn "neon_vdup_lane<mode>_internal"
2952 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2954 (vec_select:<V_elem>
2955 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2956 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2959 if (BYTES_BIG_ENDIAN)
2961 int elt = INTVAL (operands[2]);
2962 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2963 operands[2] = GEN_INT (elt);
2966 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2968 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2970 [(set_attr "type" "neon_dup<q>")]
2973 (define_expand "neon_vdup_lane<mode>"
2974 [(match_operand:VDQW 0 "s_register_operand" "=w")
2975 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2976 (match_operand:SI 2 "immediate_operand" "i")]
2979 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2980 if (BYTES_BIG_ENDIAN)
2982 unsigned int elt = INTVAL (operands[2]);
2983 unsigned int reg_nelts
2984 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2985 elt ^= reg_nelts - 1;
2986 operands[2] = GEN_INT (elt);
2988 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2993 ; Scalar index is ignored, since only zero is valid here.
2994 (define_expand "neon_vdup_lanedi"
2995 [(match_operand:DI 0 "s_register_operand" "=w")
2996 (match_operand:DI 1 "s_register_operand" "w")
2997 (match_operand:SI 2 "immediate_operand" "i")]
3000 neon_lane_bounds (operands[2], 0, 1);
3001 emit_move_insn (operands[0], operands[1]);
3005 ; Likewise for v2di, as the DImode second operand has only a single element.
3006 (define_expand "neon_vdup_lanev2di"
3007 [(match_operand:V2DI 0 "s_register_operand" "=w")
3008 (match_operand:DI 1 "s_register_operand" "w")
3009 (match_operand:SI 2 "immediate_operand" "i")]
3012 neon_lane_bounds (operands[2], 0, 1);
3013 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3017 ; Disabled before reload because we don't want combine doing something silly,
3018 ; but used by the post-reload expansion of neon_vcombine.
3019 (define_insn "*neon_vswp<mode>"
3020 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3021 (match_operand:VDQX 1 "s_register_operand" "+w"))
3022 (set (match_dup 1) (match_dup 0))]
3023 "TARGET_NEON && reload_completed"
3024 "vswp\t%<V_reg>0, %<V_reg>1"
3025 [(set_attr "type" "neon_permute<q>")]
3028 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3030 ;; FIXME: A different implementation of this builtin could make it much
3031 ;; more likely that we wouldn't actually need to output anything (we could make
3032 ;; it so that the reg allocator puts things in the right places magically
3033 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3035 (define_insn_and_split "neon_vcombine<mode>"
3036 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3037 (vec_concat:<V_DOUBLE>
3038 (match_operand:VDX 1 "s_register_operand" "w")
3039 (match_operand:VDX 2 "s_register_operand" "w")))]
3042 "&& reload_completed"
3045 neon_split_vcombine (operands);
3048 [(set_attr "type" "multiple")]
3051 (define_expand "neon_vget_high<mode>"
3052 [(match_operand:<V_HALF> 0 "s_register_operand")
3053 (match_operand:VQX 1 "s_register_operand")]
3056 emit_move_insn (operands[0],
3057 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3058 GET_MODE_SIZE (<V_HALF>mode)));
3062 (define_expand "neon_vget_low<mode>"
3063 [(match_operand:<V_HALF> 0 "s_register_operand")
3064 (match_operand:VQX 1 "s_register_operand")]
3067 emit_move_insn (operands[0],
3068 simplify_gen_subreg (<V_HALF>mode, operands[1],
3073 (define_insn "float<mode><V_cvtto>2"
3074 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3075 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3076 "TARGET_NEON && !flag_rounding_math"
3077 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3078 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3081 (define_insn "floatuns<mode><V_cvtto>2"
3082 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3083 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3084 "TARGET_NEON && !flag_rounding_math"
3085 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3086 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3089 (define_insn "fix_trunc<mode><V_cvtto>2"
3090 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3091 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3093 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3094 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3097 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3098 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3099 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3101 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3102 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3105 (define_insn "neon_vcvt<mode>"
3106 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3107 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3108 (match_operand:SI 2 "immediate_operand" "i")]
3111 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3112 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3115 (define_insn "neon_vcvt<mode>"
3116 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3117 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3118 (match_operand:SI 2 "immediate_operand" "i")]
3121 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3122 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3125 (define_insn "neon_vcvtv4sfv4hf"
3126 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3127 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3129 "TARGET_NEON && TARGET_FP16"
3130 "vcvt.f32.f16\t%q0, %P1"
3131 [(set_attr "type" "neon_fp_cvt_widen_h")]
3134 (define_insn "neon_vcvtv4hfv4sf"
3135 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3136 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3138 "TARGET_NEON && TARGET_FP16"
3139 "vcvt.f16.f32\t%P0, %q1"
3140 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3143 (define_insn "neon_vcvt_n<mode>"
3144 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3145 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3146 (match_operand:SI 2 "immediate_operand" "i")
3147 (match_operand:SI 3 "immediate_operand" "i")]
3151 neon_const_bounds (operands[2], 1, 33);
3152 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3154 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3157 (define_insn "neon_vcvt_n<mode>"
3158 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3159 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3160 (match_operand:SI 2 "immediate_operand" "i")
3161 (match_operand:SI 3 "immediate_operand" "i")]
3165 neon_const_bounds (operands[2], 1, 33);
3166 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3168 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3171 (define_insn "neon_vmovn<mode>"
3172 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3173 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3174 (match_operand:SI 2 "immediate_operand" "i")]
3177 "vmovn.<V_if_elem>\t%P0, %q1"
3178 [(set_attr "type" "neon_shift_imm_narrow_q")]
3181 (define_insn "neon_vqmovn<mode>"
3182 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3183 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3184 (match_operand:SI 2 "immediate_operand" "i")]
3187 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3188 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3191 (define_insn "neon_vqmovun<mode>"
3192 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3193 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3194 (match_operand:SI 2 "immediate_operand" "i")]
3197 "vqmovun.<V_s_elem>\t%P0, %q1"
3198 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3201 (define_insn "neon_vmovl<mode>"
3202 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3203 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3204 (match_operand:SI 2 "immediate_operand" "i")]
3207 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3208 [(set_attr "type" "neon_shift_imm_long")]
3211 (define_insn "neon_vmul_lane<mode>"
3212 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3213 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3214 (match_operand:VMD 2 "s_register_operand"
3215 "<scalar_mul_constraint>")
3216 (match_operand:SI 3 "immediate_operand" "i")
3217 (match_operand:SI 4 "immediate_operand" "i")]
3221 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3222 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3225 (if_then_else (match_test "<Is_float_mode>")
3226 (const_string "neon_fp_mul_s_scalar<q>")
3227 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3230 (define_insn "neon_vmul_lane<mode>"
3231 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3232 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3233 (match_operand:<V_HALF> 2 "s_register_operand"
3234 "<scalar_mul_constraint>")
3235 (match_operand:SI 3 "immediate_operand" "i")
3236 (match_operand:SI 4 "immediate_operand" "i")]
3240 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3241 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3244 (if_then_else (match_test "<Is_float_mode>")
3245 (const_string "neon_fp_mul_s_scalar<q>")
3246 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3249 (define_insn "neon_vmull_lane<mode>"
3250 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3251 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3252 (match_operand:VMDI 2 "s_register_operand"
3253 "<scalar_mul_constraint>")
3254 (match_operand:SI 3 "immediate_operand" "i")
3255 (match_operand:SI 4 "immediate_operand" "i")]
3256 UNSPEC_VMULL_LANE))]
3259 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3260 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3262 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3265 (define_insn "neon_vqdmull_lane<mode>"
3266 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3267 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3268 (match_operand:VMDI 2 "s_register_operand"
3269 "<scalar_mul_constraint>")
3270 (match_operand:SI 3 "immediate_operand" "i")
3271 (match_operand:SI 4 "immediate_operand" "i")]
3272 UNSPEC_VQDMULL_LANE))]
3275 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3276 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3278 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3281 (define_insn "neon_vqdmulh_lane<mode>"
3282 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3283 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3284 (match_operand:<V_HALF> 2 "s_register_operand"
3285 "<scalar_mul_constraint>")
3286 (match_operand:SI 3 "immediate_operand" "i")
3287 (match_operand:SI 4 "immediate_operand" "i")]
3288 UNSPEC_VQDMULH_LANE))]
3291 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3292 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3294 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3297 (define_insn "neon_vqdmulh_lane<mode>"
3298 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3299 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3300 (match_operand:VMDI 2 "s_register_operand"
3301 "<scalar_mul_constraint>")
3302 (match_operand:SI 3 "immediate_operand" "i")
3303 (match_operand:SI 4 "immediate_operand" "i")]
3304 UNSPEC_VQDMULH_LANE))]
3307 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3308 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3310 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3313 (define_insn "neon_vmla_lane<mode>"
3314 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3315 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3316 (match_operand:VMD 2 "s_register_operand" "w")
3317 (match_operand:VMD 3 "s_register_operand"
3318 "<scalar_mul_constraint>")
3319 (match_operand:SI 4 "immediate_operand" "i")
3320 (match_operand:SI 5 "immediate_operand" "i")]
3324 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3325 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3328 (if_then_else (match_test "<Is_float_mode>")
3329 (const_string "neon_fp_mla_s_scalar<q>")
3330 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3333 (define_insn "neon_vmla_lane<mode>"
3334 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3335 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3336 (match_operand:VMQ 2 "s_register_operand" "w")
3337 (match_operand:<V_HALF> 3 "s_register_operand"
3338 "<scalar_mul_constraint>")
3339 (match_operand:SI 4 "immediate_operand" "i")
3340 (match_operand:SI 5 "immediate_operand" "i")]
3344 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3345 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3348 (if_then_else (match_test "<Is_float_mode>")
3349 (const_string "neon_fp_mla_s_scalar<q>")
3350 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3353 (define_insn "neon_vmlal_lane<mode>"
3354 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3355 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3356 (match_operand:VMDI 2 "s_register_operand" "w")
3357 (match_operand:VMDI 3 "s_register_operand"
3358 "<scalar_mul_constraint>")
3359 (match_operand:SI 4 "immediate_operand" "i")
3360 (match_operand:SI 5 "immediate_operand" "i")]
3361 UNSPEC_VMLAL_LANE))]
3364 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3365 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3367 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3370 (define_insn "neon_vqdmlal_lane<mode>"
3371 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3372 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3373 (match_operand:VMDI 2 "s_register_operand" "w")
3374 (match_operand:VMDI 3 "s_register_operand"
3375 "<scalar_mul_constraint>")
3376 (match_operand:SI 4 "immediate_operand" "i")
3377 (match_operand:SI 5 "immediate_operand" "i")]
3378 UNSPEC_VQDMLAL_LANE))]
3381 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3382 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3384 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3387 (define_insn "neon_vmls_lane<mode>"
3388 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3389 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3390 (match_operand:VMD 2 "s_register_operand" "w")
3391 (match_operand:VMD 3 "s_register_operand"
3392 "<scalar_mul_constraint>")
3393 (match_operand:SI 4 "immediate_operand" "i")
3394 (match_operand:SI 5 "immediate_operand" "i")]
3398 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3399 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3402 (if_then_else (match_test "<Is_float_mode>")
3403 (const_string "neon_fp_mla_s_scalar<q>")
3404 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3407 (define_insn "neon_vmls_lane<mode>"
3408 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3409 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3410 (match_operand:VMQ 2 "s_register_operand" "w")
3411 (match_operand:<V_HALF> 3 "s_register_operand"
3412 "<scalar_mul_constraint>")
3413 (match_operand:SI 4 "immediate_operand" "i")
3414 (match_operand:SI 5 "immediate_operand" "i")]
3418 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3419 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3422 (if_then_else (match_test "<Is_float_mode>")
3423 (const_string "neon_fp_mla_s_scalar<q>")
3424 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3427 (define_insn "neon_vmlsl_lane<mode>"
3428 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3429 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3430 (match_operand:VMDI 2 "s_register_operand" "w")
3431 (match_operand:VMDI 3 "s_register_operand"
3432 "<scalar_mul_constraint>")
3433 (match_operand:SI 4 "immediate_operand" "i")
3434 (match_operand:SI 5 "immediate_operand" "i")]
3435 UNSPEC_VMLSL_LANE))]
3438 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3439 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3441 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3444 (define_insn "neon_vqdmlsl_lane<mode>"
3445 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3446 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3447 (match_operand:VMDI 2 "s_register_operand" "w")
3448 (match_operand:VMDI 3 "s_register_operand"
3449 "<scalar_mul_constraint>")
3450 (match_operand:SI 4 "immediate_operand" "i")
3451 (match_operand:SI 5 "immediate_operand" "i")]
3452 UNSPEC_VQDMLSL_LANE))]
3455 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3456 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3458 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3461 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3462 ; core register into a temp register, then use a scalar taken from that. This
3463 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3464 ; or extracted from another vector. The latter case it's currently better to
3465 ; use the "_lane" variant, and the former case can probably be implemented
3466 ; using vld1_lane, but that hasn't been done yet.
3468 (define_expand "neon_vmul_n<mode>"
3469 [(match_operand:VMD 0 "s_register_operand" "")
3470 (match_operand:VMD 1 "s_register_operand" "")
3471 (match_operand:<V_elem> 2 "s_register_operand" "")
3472 (match_operand:SI 3 "immediate_operand" "")]
3475 rtx tmp = gen_reg_rtx (<MODE>mode);
3476 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3477 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3478 const0_rtx, const0_rtx));
3482 (define_expand "neon_vmul_n<mode>"
3483 [(match_operand:VMQ 0 "s_register_operand" "")
3484 (match_operand:VMQ 1 "s_register_operand" "")
3485 (match_operand:<V_elem> 2 "s_register_operand" "")
3486 (match_operand:SI 3 "immediate_operand" "")]
3489 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3490 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3491 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3492 const0_rtx, const0_rtx));
3496 (define_expand "neon_vmull_n<mode>"
3497 [(match_operand:<V_widen> 0 "s_register_operand" "")
3498 (match_operand:VMDI 1 "s_register_operand" "")
3499 (match_operand:<V_elem> 2 "s_register_operand" "")
3500 (match_operand:SI 3 "immediate_operand" "")]
3503 rtx tmp = gen_reg_rtx (<MODE>mode);
3504 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3505 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3506 const0_rtx, operands[3]));
3510 (define_expand "neon_vqdmull_n<mode>"
3511 [(match_operand:<V_widen> 0 "s_register_operand" "")
3512 (match_operand:VMDI 1 "s_register_operand" "")
3513 (match_operand:<V_elem> 2 "s_register_operand" "")
3514 (match_operand:SI 3 "immediate_operand" "")]
3517 rtx tmp = gen_reg_rtx (<MODE>mode);
3518 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3519 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3520 const0_rtx, const0_rtx));
3524 (define_expand "neon_vqdmulh_n<mode>"
3525 [(match_operand:VMDI 0 "s_register_operand" "")
3526 (match_operand:VMDI 1 "s_register_operand" "")
3527 (match_operand:<V_elem> 2 "s_register_operand" "")
3528 (match_operand:SI 3 "immediate_operand" "")]
3531 rtx tmp = gen_reg_rtx (<MODE>mode);
3532 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3533 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3534 const0_rtx, operands[3]));
3538 (define_expand "neon_vqdmulh_n<mode>"
3539 [(match_operand:VMQI 0 "s_register_operand" "")
3540 (match_operand:VMQI 1 "s_register_operand" "")
3541 (match_operand:<V_elem> 2 "s_register_operand" "")
3542 (match_operand:SI 3 "immediate_operand" "")]
3545 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3546 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3547 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3548 const0_rtx, operands[3]));
3552 (define_expand "neon_vmla_n<mode>"
3553 [(match_operand:VMD 0 "s_register_operand" "")
3554 (match_operand:VMD 1 "s_register_operand" "")
3555 (match_operand:VMD 2 "s_register_operand" "")
3556 (match_operand:<V_elem> 3 "s_register_operand" "")
3557 (match_operand:SI 4 "immediate_operand" "")]
3560 rtx tmp = gen_reg_rtx (<MODE>mode);
3561 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3562 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3563 tmp, const0_rtx, operands[4]));
3567 (define_expand "neon_vmla_n<mode>"
3568 [(match_operand:VMQ 0 "s_register_operand" "")
3569 (match_operand:VMQ 1 "s_register_operand" "")
3570 (match_operand:VMQ 2 "s_register_operand" "")
3571 (match_operand:<V_elem> 3 "s_register_operand" "")
3572 (match_operand:SI 4 "immediate_operand" "")]
3575 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3576 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3577 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3578 tmp, const0_rtx, operands[4]));
3582 (define_expand "neon_vmlal_n<mode>"
3583 [(match_operand:<V_widen> 0 "s_register_operand" "")
3584 (match_operand:<V_widen> 1 "s_register_operand" "")
3585 (match_operand:VMDI 2 "s_register_operand" "")
3586 (match_operand:<V_elem> 3 "s_register_operand" "")
3587 (match_operand:SI 4 "immediate_operand" "")]
3590 rtx tmp = gen_reg_rtx (<MODE>mode);
3591 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3592 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3593 tmp, const0_rtx, operands[4]));
3597 (define_expand "neon_vqdmlal_n<mode>"
3598 [(match_operand:<V_widen> 0 "s_register_operand" "")
3599 (match_operand:<V_widen> 1 "s_register_operand" "")
3600 (match_operand:VMDI 2 "s_register_operand" "")
3601 (match_operand:<V_elem> 3 "s_register_operand" "")
3602 (match_operand:SI 4 "immediate_operand" "")]
3605 rtx tmp = gen_reg_rtx (<MODE>mode);
3606 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3607 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3608 tmp, const0_rtx, operands[4]));
3612 (define_expand "neon_vmls_n<mode>"
3613 [(match_operand:VMD 0 "s_register_operand" "")
3614 (match_operand:VMD 1 "s_register_operand" "")
3615 (match_operand:VMD 2 "s_register_operand" "")
3616 (match_operand:<V_elem> 3 "s_register_operand" "")
3617 (match_operand:SI 4 "immediate_operand" "")]
3620 rtx tmp = gen_reg_rtx (<MODE>mode);
3621 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3622 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3623 tmp, const0_rtx, operands[4]));
3627 (define_expand "neon_vmls_n<mode>"
3628 [(match_operand:VMQ 0 "s_register_operand" "")
3629 (match_operand:VMQ 1 "s_register_operand" "")
3630 (match_operand:VMQ 2 "s_register_operand" "")
3631 (match_operand:<V_elem> 3 "s_register_operand" "")
3632 (match_operand:SI 4 "immediate_operand" "")]
3635 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3636 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3637 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3638 tmp, const0_rtx, operands[4]));
3642 (define_expand "neon_vmlsl_n<mode>"
3643 [(match_operand:<V_widen> 0 "s_register_operand" "")
3644 (match_operand:<V_widen> 1 "s_register_operand" "")
3645 (match_operand:VMDI 2 "s_register_operand" "")
3646 (match_operand:<V_elem> 3 "s_register_operand" "")
3647 (match_operand:SI 4 "immediate_operand" "")]
3650 rtx tmp = gen_reg_rtx (<MODE>mode);
3651 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3652 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3653 tmp, const0_rtx, operands[4]));
3657 (define_expand "neon_vqdmlsl_n<mode>"
3658 [(match_operand:<V_widen> 0 "s_register_operand" "")
3659 (match_operand:<V_widen> 1 "s_register_operand" "")
3660 (match_operand:VMDI 2 "s_register_operand" "")
3661 (match_operand:<V_elem> 3 "s_register_operand" "")
3662 (match_operand:SI 4 "immediate_operand" "")]
3665 rtx tmp = gen_reg_rtx (<MODE>mode);
3666 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3667 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3668 tmp, const0_rtx, operands[4]));
3672 (define_insn "neon_vext<mode>"
3673 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3674 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3675 (match_operand:VDQX 2 "s_register_operand" "w")
3676 (match_operand:SI 3 "immediate_operand" "i")]
3680 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3681 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3683 [(set_attr "type" "neon_ext<q>")]
3686 (define_insn "neon_vrev64<mode>"
3687 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3688 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3689 (match_operand:SI 2 "immediate_operand" "i")]
3692 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3693 [(set_attr "type" "neon_rev<q>")]
3696 (define_insn "neon_vrev32<mode>"
3697 [(set (match_operand:VX 0 "s_register_operand" "=w")
3698 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3699 (match_operand:SI 2 "immediate_operand" "i")]
3702 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3703 [(set_attr "type" "neon_rev<q>")]
3706 (define_insn "neon_vrev16<mode>"
3707 [(set (match_operand:VE 0 "s_register_operand" "=w")
3708 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3709 (match_operand:SI 2 "immediate_operand" "i")]
3712 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3713 [(set_attr "type" "neon_rev<q>")]
3716 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3717 ; allocation. For an intrinsic of form:
3718 ; rD = vbsl_* (rS, rN, rM)
3719 ; We can use any of:
3720 ; vbsl rS, rN, rM (if D = S)
3721 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3722 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3724 (define_insn "neon_vbsl<mode>_internal"
3725 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3726 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3727 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3728 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3732 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3733 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3734 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3735 [(set_attr "type" "neon_bsl<q>")]
3738 (define_expand "neon_vbsl<mode>"
3739 [(set (match_operand:VDQX 0 "s_register_operand" "")
3740 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3741 (match_operand:VDQX 2 "s_register_operand" "")
3742 (match_operand:VDQX 3 "s_register_operand" "")]
3746 /* We can't alias operands together if they have different modes. */
3747 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3750 (define_insn "neon_vshl<mode>"
3751 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3752 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3753 (match_operand:VDQIX 2 "s_register_operand" "w")
3754 (match_operand:SI 3 "immediate_operand" "i")]
3757 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3758 [(set_attr "type" "neon_shift_imm<q>")]
3761 (define_insn "neon_vqshl<mode>"
3762 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3763 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3764 (match_operand:VDQIX 2 "s_register_operand" "w")
3765 (match_operand:SI 3 "immediate_operand" "i")]
3768 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3769 [(set_attr "type" "neon_sat_shift_imm<q>")]
3772 (define_insn "neon_vshr_n<mode>"
3773 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3774 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3775 (match_operand:SI 2 "immediate_operand" "i")
3776 (match_operand:SI 3 "immediate_operand" "i")]
3780 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3781 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3783 [(set_attr "type" "neon_shift_imm<q>")]
3786 (define_insn "neon_vshrn_n<mode>"
3787 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3788 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3789 (match_operand:SI 2 "immediate_operand" "i")
3790 (match_operand:SI 3 "immediate_operand" "i")]
3794 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3795 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3797 [(set_attr "type" "neon_shift_imm_narrow_q")]
3800 (define_insn "neon_vqshrn_n<mode>"
3801 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3802 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3803 (match_operand:SI 2 "immediate_operand" "i")
3804 (match_operand:SI 3 "immediate_operand" "i")]
3808 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3809 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3811 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3814 (define_insn "neon_vqshrun_n<mode>"
3815 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3816 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3817 (match_operand:SI 2 "immediate_operand" "i")
3818 (match_operand:SI 3 "immediate_operand" "i")]
3822 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3823 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3825 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3828 (define_insn "neon_vshl_n<mode>"
3829 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3830 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3831 (match_operand:SI 2 "immediate_operand" "i")
3832 (match_operand:SI 3 "immediate_operand" "i")]
3836 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3837 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3839 [(set_attr "type" "neon_shift_imm<q>")]
3842 (define_insn "neon_vqshl_n<mode>"
3843 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3844 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3845 (match_operand:SI 2 "immediate_operand" "i")
3846 (match_operand:SI 3 "immediate_operand" "i")]
3850 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3851 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3853 [(set_attr "type" "neon_sat_shift_imm<q>")]
3856 (define_insn "neon_vqshlu_n<mode>"
3857 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3858 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3859 (match_operand:SI 2 "immediate_operand" "i")
3860 (match_operand:SI 3 "immediate_operand" "i")]
3864 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3865 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3867 [(set_attr "type" "neon_sat_shift_imm<q>")]
3870 (define_insn "neon_vshll_n<mode>"
3871 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3872 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3873 (match_operand:SI 2 "immediate_operand" "i")
3874 (match_operand:SI 3 "immediate_operand" "i")]
3878 /* The boundaries are: 0 < imm <= size. */
3879 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3880 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3882 [(set_attr "type" "neon_shift_imm_long")]
3885 (define_insn "neon_vsra_n<mode>"
3886 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3887 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3888 (match_operand:VDQIX 2 "s_register_operand" "w")
3889 (match_operand:SI 3 "immediate_operand" "i")
3890 (match_operand:SI 4 "immediate_operand" "i")]
3894 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3895 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3897 [(set_attr "type" "neon_shift_acc<q>")]
3900 (define_insn "neon_vsri_n<mode>"
3901 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3902 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3903 (match_operand:VDQIX 2 "s_register_operand" "w")
3904 (match_operand:SI 3 "immediate_operand" "i")]
3908 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3909 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3911 [(set_attr "type" "neon_shift_reg<q>")]
3914 (define_insn "neon_vsli_n<mode>"
3915 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3916 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3917 (match_operand:VDQIX 2 "s_register_operand" "w")
3918 (match_operand:SI 3 "immediate_operand" "i")]
3922 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3923 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3925 [(set_attr "type" "neon_shift_reg<q>")]
3928 (define_insn "neon_vtbl1v8qi"
3929 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3930 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3931 (match_operand:V8QI 2 "s_register_operand" "w")]
3934 "vtbl.8\t%P0, {%P1}, %P2"
3935 [(set_attr "type" "neon_tbl1")]
3938 (define_insn "neon_vtbl2v8qi"
3939 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3940 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3941 (match_operand:V8QI 2 "s_register_operand" "w")]
3946 int tabbase = REGNO (operands[1]);
3948 ops[0] = operands[0];
3949 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3950 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3951 ops[3] = operands[2];
3952 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3956 [(set_attr "type" "neon_tbl2")]
3959 (define_insn "neon_vtbl3v8qi"
3960 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3961 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3962 (match_operand:V8QI 2 "s_register_operand" "w")]
3967 int tabbase = REGNO (operands[1]);
3969 ops[0] = operands[0];
3970 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3971 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3972 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3973 ops[4] = operands[2];
3974 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3978 [(set_attr "type" "neon_tbl3")]
3981 (define_insn "neon_vtbl4v8qi"
3982 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3983 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3984 (match_operand:V8QI 2 "s_register_operand" "w")]
3989 int tabbase = REGNO (operands[1]);
3991 ops[0] = operands[0];
3992 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3993 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3994 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3995 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3996 ops[5] = operands[2];
3997 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4001 [(set_attr "type" "neon_tbl4")]
4004 ;; These three are used by the vec_perm infrastructure for V16QImode.
4005 (define_insn_and_split "neon_vtbl1v16qi"
4006 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4007 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4008 (match_operand:V16QI 2 "s_register_operand" "w")]
4012 "&& reload_completed"
4015 rtx op0, op1, op2, part0, part2;
4019 op1 = gen_lowpart (TImode, operands[1]);
4022 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4023 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4024 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4025 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4027 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4028 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4029 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4030 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4033 [(set_attr "type" "multiple")]
4036 (define_insn_and_split "neon_vtbl2v16qi"
4037 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4038 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4039 (match_operand:V16QI 2 "s_register_operand" "w")]
4043 "&& reload_completed"
4046 rtx op0, op1, op2, part0, part2;
4053 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4054 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4055 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4056 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4058 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4059 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4060 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4061 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4064 [(set_attr "type" "multiple")]
4067 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4068 ;; handle quad-word input modes, producing octa-word output modes. But
4069 ;; that requires us to add support for octa-word vector modes in moves.
4070 ;; That seems overkill for this one use in vec_perm.
4071 (define_insn_and_split "neon_vcombinev16qi"
4072 [(set (match_operand:OI 0 "s_register_operand" "=w")
4073 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4074 (match_operand:V16QI 2 "s_register_operand" "w")]
4078 "&& reload_completed"
4081 neon_split_vcombine (operands);
4084 [(set_attr "type" "multiple")]
4087 (define_insn "neon_vtbx1v8qi"
4088 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4089 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4090 (match_operand:V8QI 2 "s_register_operand" "w")
4091 (match_operand:V8QI 3 "s_register_operand" "w")]
4094 "vtbx.8\t%P0, {%P2}, %P3"
4095 [(set_attr "type" "neon_tbl1")]
4098 (define_insn "neon_vtbx2v8qi"
4099 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4100 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4101 (match_operand:TI 2 "s_register_operand" "w")
4102 (match_operand:V8QI 3 "s_register_operand" "w")]
4107 int tabbase = REGNO (operands[2]);
4109 ops[0] = operands[0];
4110 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4111 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4112 ops[3] = operands[3];
4113 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4117 [(set_attr "type" "neon_tbl2")]
4120 (define_insn "neon_vtbx3v8qi"
4121 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4122 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4123 (match_operand:EI 2 "s_register_operand" "w")
4124 (match_operand:V8QI 3 "s_register_operand" "w")]
4129 int tabbase = REGNO (operands[2]);
4131 ops[0] = operands[0];
4132 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4133 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4134 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4135 ops[4] = operands[3];
4136 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4140 [(set_attr "type" "neon_tbl3")]
4143 (define_insn "neon_vtbx4v8qi"
4144 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4145 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4146 (match_operand:OI 2 "s_register_operand" "w")
4147 (match_operand:V8QI 3 "s_register_operand" "w")]
4152 int tabbase = REGNO (operands[2]);
4154 ops[0] = operands[0];
4155 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4156 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4157 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4158 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4159 ops[5] = operands[3];
4160 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4164 [(set_attr "type" "neon_tbl4")]
4167 (define_expand "neon_vtrn<mode>_internal"
4169 [(set (match_operand:VDQW 0 "s_register_operand" "")
4170 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4171 (match_operand:VDQW 2 "s_register_operand" "")]
4173 (set (match_operand:VDQW 3 "s_register_operand" "")
4174 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4179 ;; Note: Different operand numbering to handle tied registers correctly.
4180 (define_insn "*neon_vtrn<mode>_insn"
4181 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4182 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4183 (match_operand:VDQW 3 "s_register_operand" "2")]
4185 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4186 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4189 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4190 [(set_attr "type" "neon_permute<q>")]
4193 (define_expand "neon_vzip<mode>_internal"
4195 [(set (match_operand:VDQW 0 "s_register_operand" "")
4196 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4197 (match_operand:VDQW 2 "s_register_operand" "")]
4199 (set (match_operand:VDQW 3 "s_register_operand" "")
4200 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4205 ;; Note: Different operand numbering to handle tied registers correctly.
4206 (define_insn "*neon_vzip<mode>_insn"
4207 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4208 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4209 (match_operand:VDQW 3 "s_register_operand" "2")]
4211 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4212 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4215 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4216 [(set_attr "type" "neon_zip<q>")]
4219 (define_expand "neon_vuzp<mode>_internal"
4221 [(set (match_operand:VDQW 0 "s_register_operand" "")
4222 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4223 (match_operand:VDQW 2 "s_register_operand" "")]
4225 (set (match_operand:VDQW 3 "s_register_operand" "")
4226 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4231 ;; Note: Different operand numbering to handle tied registers correctly.
4232 (define_insn "*neon_vuzp<mode>_insn"
4233 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4234 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4235 (match_operand:VDQW 3 "s_register_operand" "2")]
4237 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4238 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4241 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4242 [(set_attr "type" "neon_zip<q>")]
4245 (define_expand "neon_vreinterpretv8qi<mode>"
4246 [(match_operand:V8QI 0 "s_register_operand" "")
4247 (match_operand:VDX 1 "s_register_operand" "")]
4250 neon_reinterpret (operands[0], operands[1]);
4254 (define_expand "neon_vreinterpretv4hi<mode>"
4255 [(match_operand:V4HI 0 "s_register_operand" "")
4256 (match_operand:VDX 1 "s_register_operand" "")]
4259 neon_reinterpret (operands[0], operands[1]);
4263 (define_expand "neon_vreinterpretv2si<mode>"
4264 [(match_operand:V2SI 0 "s_register_operand" "")
4265 (match_operand:VDX 1 "s_register_operand" "")]
4268 neon_reinterpret (operands[0], operands[1]);
4272 (define_expand "neon_vreinterpretv2sf<mode>"
4273 [(match_operand:V2SF 0 "s_register_operand" "")
4274 (match_operand:VDX 1 "s_register_operand" "")]
4277 neon_reinterpret (operands[0], operands[1]);
4281 (define_expand "neon_vreinterpretdi<mode>"
4282 [(match_operand:DI 0 "s_register_operand" "")
4283 (match_operand:VDX 1 "s_register_operand" "")]
4286 neon_reinterpret (operands[0], operands[1]);
4290 (define_expand "neon_vreinterpretti<mode>"
4291 [(match_operand:TI 0 "s_register_operand" "")
4292 (match_operand:VQXMOV 1 "s_register_operand" "")]
4295 neon_reinterpret (operands[0], operands[1]);
4300 (define_expand "neon_vreinterpretv16qi<mode>"
4301 [(match_operand:V16QI 0 "s_register_operand" "")
4302 (match_operand:VQXMOV 1 "s_register_operand" "")]
4305 neon_reinterpret (operands[0], operands[1]);
4309 (define_expand "neon_vreinterpretv8hi<mode>"
4310 [(match_operand:V8HI 0 "s_register_operand" "")
4311 (match_operand:VQXMOV 1 "s_register_operand" "")]
4314 neon_reinterpret (operands[0], operands[1]);
4318 (define_expand "neon_vreinterpretv4si<mode>"
4319 [(match_operand:V4SI 0 "s_register_operand" "")
4320 (match_operand:VQXMOV 1 "s_register_operand" "")]
4323 neon_reinterpret (operands[0], operands[1]);
4327 (define_expand "neon_vreinterpretv4sf<mode>"
4328 [(match_operand:V4SF 0 "s_register_operand" "")
4329 (match_operand:VQXMOV 1 "s_register_operand" "")]
4332 neon_reinterpret (operands[0], operands[1]);
4336 (define_expand "neon_vreinterpretv2di<mode>"
4337 [(match_operand:V2DI 0 "s_register_operand" "")
4338 (match_operand:VQXMOV 1 "s_register_operand" "")]
4341 neon_reinterpret (operands[0], operands[1]);
4345 (define_expand "vec_load_lanes<mode><mode>"
4346 [(set (match_operand:VDQX 0 "s_register_operand")
4347 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4351 (define_insn "neon_vld1<mode>"
4352 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4353 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4356 "vld1.<V_sz_elem>\t%h0, %A1"
4357 [(set_attr "type" "neon_load1_1reg<q>")]
4360 (define_insn "neon_vld1_lane<mode>"
4361 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4362 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4363 (match_operand:VDX 2 "s_register_operand" "0")
4364 (match_operand:SI 3 "immediate_operand" "i")]
4368 HOST_WIDE_INT lane = INTVAL (operands[3]);
4369 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4370 if (lane < 0 || lane >= max)
4371 error ("lane out of range");
4373 return "vld1.<V_sz_elem>\t%P0, %A1";
4375 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4377 [(set_attr "type" "neon_load1_one_lane<q>")]
4380 (define_insn "neon_vld1_lane<mode>"
4381 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4382 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4383 (match_operand:VQX 2 "s_register_operand" "0")
4384 (match_operand:SI 3 "immediate_operand" "i")]
4388 HOST_WIDE_INT lane = INTVAL (operands[3]);
4389 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4390 int regno = REGNO (operands[0]);
4391 if (lane < 0 || lane >= max)
4392 error ("lane out of range");
4393 else if (lane >= max / 2)
4397 operands[3] = GEN_INT (lane);
4399 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4401 return "vld1.<V_sz_elem>\t%P0, %A1";
4403 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4405 [(set_attr "type" "neon_load1_one_lane<q>")]
4408 (define_insn "neon_vld1_dup<mode>"
4409 [(set (match_operand:VD 0 "s_register_operand" "=w")
4410 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4412 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4413 [(set_attr "type" "neon_load1_all_lanes<q>")]
4416 ;; Special case for DImode. Treat it exactly like a simple load.
4417 (define_expand "neon_vld1_dupdi"
4418 [(set (match_operand:DI 0 "s_register_operand" "")
4419 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4425 (define_insn "neon_vld1_dup<mode>"
4426 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4427 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4430 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4432 [(set_attr "type" "neon_load1_all_lanes<q>")]
4435 (define_insn_and_split "neon_vld1_dupv2di"
4436 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4437 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4440 "&& reload_completed"
4443 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4444 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4445 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4448 [(set_attr "length" "8")
4449 (set_attr "type" "neon_load1_all_lanes_q")]
4452 (define_expand "vec_store_lanes<mode><mode>"
4453 [(set (match_operand:VDQX 0 "neon_struct_operand")
4454 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4458 (define_insn "neon_vst1<mode>"
4459 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4460 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4463 "vst1.<V_sz_elem>\t%h1, %A0"
4464 [(set_attr "type" "neon_store1_1reg<q>")])
4466 (define_insn "neon_vst1_lane<mode>"
4467 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4469 [(match_operand:VDX 1 "s_register_operand" "w")
4470 (match_operand:SI 2 "immediate_operand" "i")]
4474 HOST_WIDE_INT lane = INTVAL (operands[2]);
4475 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4476 if (lane < 0 || lane >= max)
4477 error ("lane out of range");
4479 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4481 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4483 [(set_attr "type" "neon_store1_one_lane<q>")]
4486 (define_insn "neon_vst1_lane<mode>"
4487 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4489 [(match_operand:VQX 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4494 HOST_WIDE_INT lane = INTVAL (operands[2]);
4495 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4496 int regno = REGNO (operands[1]);
4497 if (lane < 0 || lane >= max)
4498 error ("lane out of range");
4499 else if (lane >= max / 2)
4503 operands[2] = GEN_INT (lane);
4505 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4507 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4509 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4511 [(set_attr "type" "neon_store1_one_lane<q>")]
4514 (define_expand "vec_load_lanesti<mode>"
4515 [(set (match_operand:TI 0 "s_register_operand")
4516 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4517 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4521 (define_insn "neon_vld2<mode>"
4522 [(set (match_operand:TI 0 "s_register_operand" "=w")
4523 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4524 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4528 if (<V_sz_elem> == 64)
4529 return "vld1.64\t%h0, %A1";
4531 return "vld2.<V_sz_elem>\t%h0, %A1";
4534 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4535 (const_string "neon_load1_2reg<q>")
4536 (const_string "neon_load2_2reg<q>")))]
4539 (define_expand "vec_load_lanesoi<mode>"
4540 [(set (match_operand:OI 0 "s_register_operand")
4541 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4542 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4546 (define_insn "neon_vld2<mode>"
4547 [(set (match_operand:OI 0 "s_register_operand" "=w")
4548 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4549 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4552 "vld2.<V_sz_elem>\t%h0, %A1"
4553 [(set_attr "type" "neon_load2_2reg_q")])
4555 (define_insn "neon_vld2_lane<mode>"
4556 [(set (match_operand:TI 0 "s_register_operand" "=w")
4557 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4558 (match_operand:TI 2 "s_register_operand" "0")
4559 (match_operand:SI 3 "immediate_operand" "i")
4560 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4564 HOST_WIDE_INT lane = INTVAL (operands[3]);
4565 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4566 int regno = REGNO (operands[0]);
4568 if (lane < 0 || lane >= max)
4569 error ("lane out of range");
4570 ops[0] = gen_rtx_REG (DImode, regno);
4571 ops[1] = gen_rtx_REG (DImode, regno + 2);
4572 ops[2] = operands[1];
4573 ops[3] = operands[3];
4574 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4577 [(set_attr "type" "neon_load2_one_lane<q>")]
4580 (define_insn "neon_vld2_lane<mode>"
4581 [(set (match_operand:OI 0 "s_register_operand" "=w")
4582 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4583 (match_operand:OI 2 "s_register_operand" "0")
4584 (match_operand:SI 3 "immediate_operand" "i")
4585 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4589 HOST_WIDE_INT lane = INTVAL (operands[3]);
4590 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4591 int regno = REGNO (operands[0]);
4593 if (lane < 0 || lane >= max)
4594 error ("lane out of range");
4595 else if (lane >= max / 2)
4600 ops[0] = gen_rtx_REG (DImode, regno);
4601 ops[1] = gen_rtx_REG (DImode, regno + 4);
4602 ops[2] = operands[1];
4603 ops[3] = GEN_INT (lane);
4604 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4607 [(set_attr "type" "neon_load2_one_lane<q>")]
4610 (define_insn "neon_vld2_dup<mode>"
4611 [(set (match_operand:TI 0 "s_register_operand" "=w")
4612 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4613 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4617 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4618 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4620 return "vld1.<V_sz_elem>\t%h0, %A1";
4623 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4624 (const_string "neon_load2_all_lanes<q>")
4625 (const_string "neon_load1_1reg<q>")))]
4628 (define_expand "vec_store_lanesti<mode>"
4629 [(set (match_operand:TI 0 "neon_struct_operand")
4630 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4631 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4635 (define_insn "neon_vst2<mode>"
4636 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4637 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4638 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642 if (<V_sz_elem> == 64)
4643 return "vst1.64\t%h1, %A0";
4645 return "vst2.<V_sz_elem>\t%h1, %A0";
4648 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4649 (const_string "neon_store1_2reg<q>")
4650 (const_string "neon_store2_one_lane<q>")))]
4653 (define_expand "vec_store_lanesoi<mode>"
4654 [(set (match_operand:OI 0 "neon_struct_operand")
4655 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4656 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4660 (define_insn "neon_vst2<mode>"
4661 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4662 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4663 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4666 "vst2.<V_sz_elem>\t%h1, %A0"
4667 [(set_attr "type" "neon_store2_4reg<q>")]
4670 (define_insn "neon_vst2_lane<mode>"
4671 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4672 (unspec:<V_two_elem>
4673 [(match_operand:TI 1 "s_register_operand" "w")
4674 (match_operand:SI 2 "immediate_operand" "i")
4675 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4679 HOST_WIDE_INT lane = INTVAL (operands[2]);
4680 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4681 int regno = REGNO (operands[1]);
4683 if (lane < 0 || lane >= max)
4684 error ("lane out of range");
4685 ops[0] = operands[0];
4686 ops[1] = gen_rtx_REG (DImode, regno);
4687 ops[2] = gen_rtx_REG (DImode, regno + 2);
4688 ops[3] = operands[2];
4689 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4692 [(set_attr "type" "neon_store2_one_lane<q>")]
4695 (define_insn "neon_vst2_lane<mode>"
4696 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4697 (unspec:<V_two_elem>
4698 [(match_operand:OI 1 "s_register_operand" "w")
4699 (match_operand:SI 2 "immediate_operand" "i")
4700 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4704 HOST_WIDE_INT lane = INTVAL (operands[2]);
4705 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4706 int regno = REGNO (operands[1]);
4708 if (lane < 0 || lane >= max)
4709 error ("lane out of range");
4710 else if (lane >= max / 2)
4715 ops[0] = operands[0];
4716 ops[1] = gen_rtx_REG (DImode, regno);
4717 ops[2] = gen_rtx_REG (DImode, regno + 4);
4718 ops[3] = GEN_INT (lane);
4719 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4722 [(set_attr "type" "neon_store2_one_lane<q>")]
4725 (define_expand "vec_load_lanesei<mode>"
4726 [(set (match_operand:EI 0 "s_register_operand")
4727 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4728 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4732 (define_insn "neon_vld3<mode>"
4733 [(set (match_operand:EI 0 "s_register_operand" "=w")
4734 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4735 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4739 if (<V_sz_elem> == 64)
4740 return "vld1.64\t%h0, %A1";
4742 return "vld3.<V_sz_elem>\t%h0, %A1";
4745 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4746 (const_string "neon_load1_3reg<q>")
4747 (const_string "neon_load3_3reg<q>")))]
4750 (define_expand "vec_load_lanesci<mode>"
4751 [(match_operand:CI 0 "s_register_operand")
4752 (match_operand:CI 1 "neon_struct_operand")
4753 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4756 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4760 (define_expand "neon_vld3<mode>"
4761 [(match_operand:CI 0 "s_register_operand")
4762 (match_operand:CI 1 "neon_struct_operand")
4763 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 mem = adjust_address (operands[1], EImode, 0);
4769 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4770 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4771 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4775 (define_insn "neon_vld3qa<mode>"
4776 [(set (match_operand:CI 0 "s_register_operand" "=w")
4777 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4778 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4782 int regno = REGNO (operands[0]);
4784 ops[0] = gen_rtx_REG (DImode, regno);
4785 ops[1] = gen_rtx_REG (DImode, regno + 4);
4786 ops[2] = gen_rtx_REG (DImode, regno + 8);
4787 ops[3] = operands[1];
4788 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4791 [(set_attr "type" "neon_load3_3reg<q>")]
4794 (define_insn "neon_vld3qb<mode>"
4795 [(set (match_operand:CI 0 "s_register_operand" "=w")
4796 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4797 (match_operand:CI 2 "s_register_operand" "0")
4798 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4802 int regno = REGNO (operands[0]);
4804 ops[0] = gen_rtx_REG (DImode, regno + 2);
4805 ops[1] = gen_rtx_REG (DImode, regno + 6);
4806 ops[2] = gen_rtx_REG (DImode, regno + 10);
4807 ops[3] = operands[1];
4808 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4811 [(set_attr "type" "neon_load3_3reg<q>")]
4814 (define_insn "neon_vld3_lane<mode>"
4815 [(set (match_operand:EI 0 "s_register_operand" "=w")
4816 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4817 (match_operand:EI 2 "s_register_operand" "0")
4818 (match_operand:SI 3 "immediate_operand" "i")
4819 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4823 HOST_WIDE_INT lane = INTVAL (operands[3]);
4824 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4825 int regno = REGNO (operands[0]);
4827 if (lane < 0 || lane >= max)
4828 error ("lane out of range");
4829 ops[0] = gen_rtx_REG (DImode, regno);
4830 ops[1] = gen_rtx_REG (DImode, regno + 2);
4831 ops[2] = gen_rtx_REG (DImode, regno + 4);
4832 ops[3] = operands[1];
4833 ops[4] = operands[3];
4834 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4838 [(set_attr "type" "neon_load3_one_lane<q>")]
4841 (define_insn "neon_vld3_lane<mode>"
4842 [(set (match_operand:CI 0 "s_register_operand" "=w")
4843 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4844 (match_operand:CI 2 "s_register_operand" "0")
4845 (match_operand:SI 3 "immediate_operand" "i")
4846 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850 HOST_WIDE_INT lane = INTVAL (operands[3]);
4851 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4852 int regno = REGNO (operands[0]);
4854 if (lane < 0 || lane >= max)
4855 error ("lane out of range");
4856 else if (lane >= max / 2)
4861 ops[0] = gen_rtx_REG (DImode, regno);
4862 ops[1] = gen_rtx_REG (DImode, regno + 4);
4863 ops[2] = gen_rtx_REG (DImode, regno + 8);
4864 ops[3] = operands[1];
4865 ops[4] = GEN_INT (lane);
4866 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4870 [(set_attr "type" "neon_load3_one_lane<q>")]
4873 (define_insn "neon_vld3_dup<mode>"
4874 [(set (match_operand:EI 0 "s_register_operand" "=w")
4875 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4876 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4880 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4882 int regno = REGNO (operands[0]);
4884 ops[0] = gen_rtx_REG (DImode, regno);
4885 ops[1] = gen_rtx_REG (DImode, regno + 2);
4886 ops[2] = gen_rtx_REG (DImode, regno + 4);
4887 ops[3] = operands[1];
4888 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4892 return "vld1.<V_sz_elem>\t%h0, %A1";
4895 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4896 (const_string "neon_load3_all_lanes<q>")
4897 (const_string "neon_load1_1reg<q>")))])
4899 (define_expand "vec_store_lanesei<mode>"
4900 [(set (match_operand:EI 0 "neon_struct_operand")
4901 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4902 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4906 (define_insn "neon_vst3<mode>"
4907 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4908 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4909 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913 if (<V_sz_elem> == 64)
4914 return "vst1.64\t%h1, %A0";
4916 return "vst3.<V_sz_elem>\t%h1, %A0";
4919 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4920 (const_string "neon_store1_3reg<q>")
4921 (const_string "neon_store3_one_lane<q>")))])
4923 (define_expand "vec_store_lanesci<mode>"
4924 [(match_operand:CI 0 "neon_struct_operand")
4925 (match_operand:CI 1 "s_register_operand")
4926 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4929 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4933 (define_expand "neon_vst3<mode>"
4934 [(match_operand:CI 0 "neon_struct_operand")
4935 (match_operand:CI 1 "s_register_operand")
4936 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4941 mem = adjust_address (operands[0], EImode, 0);
4942 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4943 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4944 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4948 (define_insn "neon_vst3qa<mode>"
4949 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4950 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4951 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4955 int regno = REGNO (operands[1]);
4957 ops[0] = operands[0];
4958 ops[1] = gen_rtx_REG (DImode, regno);
4959 ops[2] = gen_rtx_REG (DImode, regno + 4);
4960 ops[3] = gen_rtx_REG (DImode, regno + 8);
4961 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4964 [(set_attr "type" "neon_store3_3reg<q>")]
4967 (define_insn "neon_vst3qb<mode>"
4968 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4969 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4974 int regno = REGNO (operands[1]);
4976 ops[0] = operands[0];
4977 ops[1] = gen_rtx_REG (DImode, regno + 2);
4978 ops[2] = gen_rtx_REG (DImode, regno + 6);
4979 ops[3] = gen_rtx_REG (DImode, regno + 10);
4980 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4983 [(set_attr "type" "neon_store3_3reg<q>")]
4986 (define_insn "neon_vst3_lane<mode>"
4987 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4988 (unspec:<V_three_elem>
4989 [(match_operand:EI 1 "s_register_operand" "w")
4990 (match_operand:SI 2 "immediate_operand" "i")
4991 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4995 HOST_WIDE_INT lane = INTVAL (operands[2]);
4996 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4997 int regno = REGNO (operands[1]);
4999 if (lane < 0 || lane >= max)
5000 error ("lane out of range");
5001 ops[0] = operands[0];
5002 ops[1] = gen_rtx_REG (DImode, regno);
5003 ops[2] = gen_rtx_REG (DImode, regno + 2);
5004 ops[3] = gen_rtx_REG (DImode, regno + 4);
5005 ops[4] = operands[2];
5006 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5010 [(set_attr "type" "neon_store3_one_lane<q>")]
5013 (define_insn "neon_vst3_lane<mode>"
5014 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5015 (unspec:<V_three_elem>
5016 [(match_operand:CI 1 "s_register_operand" "w")
5017 (match_operand:SI 2 "immediate_operand" "i")
5018 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022 HOST_WIDE_INT lane = INTVAL (operands[2]);
5023 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5024 int regno = REGNO (operands[1]);
5026 if (lane < 0 || lane >= max)
5027 error ("lane out of range");
5028 else if (lane >= max / 2)
5033 ops[0] = operands[0];
5034 ops[1] = gen_rtx_REG (DImode, regno);
5035 ops[2] = gen_rtx_REG (DImode, regno + 4);
5036 ops[3] = gen_rtx_REG (DImode, regno + 8);
5037 ops[4] = GEN_INT (lane);
5038 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5042 [(set_attr "type" "neon_store3_one_lane<q>")]
5045 (define_expand "vec_load_lanesoi<mode>"
5046 [(set (match_operand:OI 0 "s_register_operand")
5047 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5048 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5052 (define_insn "neon_vld4<mode>"
5053 [(set (match_operand:OI 0 "s_register_operand" "=w")
5054 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5055 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 if (<V_sz_elem> == 64)
5060 return "vld1.64\t%h0, %A1";
5062 return "vld4.<V_sz_elem>\t%h0, %A1";
5065 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5066 (const_string "neon_load1_4reg<q>")
5067 (const_string "neon_load4_4reg<q>")))]
5070 (define_expand "vec_load_lanesxi<mode>"
5071 [(match_operand:XI 0 "s_register_operand")
5072 (match_operand:XI 1 "neon_struct_operand")
5073 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5076 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5080 (define_expand "neon_vld4<mode>"
5081 [(match_operand:XI 0 "s_register_operand")
5082 (match_operand:XI 1 "neon_struct_operand")
5083 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088 mem = adjust_address (operands[1], OImode, 0);
5089 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5090 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5091 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5095 (define_insn "neon_vld4qa<mode>"
5096 [(set (match_operand:XI 0 "s_register_operand" "=w")
5097 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5098 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5102 int regno = REGNO (operands[0]);
5104 ops[0] = gen_rtx_REG (DImode, regno);
5105 ops[1] = gen_rtx_REG (DImode, regno + 4);
5106 ops[2] = gen_rtx_REG (DImode, regno + 8);
5107 ops[3] = gen_rtx_REG (DImode, regno + 12);
5108 ops[4] = operands[1];
5109 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5112 [(set_attr "type" "neon_load4_4reg<q>")]
5115 (define_insn "neon_vld4qb<mode>"
5116 [(set (match_operand:XI 0 "s_register_operand" "=w")
5117 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5118 (match_operand:XI 2 "s_register_operand" "0")
5119 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5123 int regno = REGNO (operands[0]);
5125 ops[0] = gen_rtx_REG (DImode, regno + 2);
5126 ops[1] = gen_rtx_REG (DImode, regno + 6);
5127 ops[2] = gen_rtx_REG (DImode, regno + 10);
5128 ops[3] = gen_rtx_REG (DImode, regno + 14);
5129 ops[4] = operands[1];
5130 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5133 [(set_attr "type" "neon_load4_4reg<q>")]
5136 (define_insn "neon_vld4_lane<mode>"
5137 [(set (match_operand:OI 0 "s_register_operand" "=w")
5138 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5139 (match_operand:OI 2 "s_register_operand" "0")
5140 (match_operand:SI 3 "immediate_operand" "i")
5141 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5145 HOST_WIDE_INT lane = INTVAL (operands[3]);
5146 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5147 int regno = REGNO (operands[0]);
5149 if (lane < 0 || lane >= max)
5150 error ("lane out of range");
5151 ops[0] = gen_rtx_REG (DImode, regno);
5152 ops[1] = gen_rtx_REG (DImode, regno + 2);
5153 ops[2] = gen_rtx_REG (DImode, regno + 4);
5154 ops[3] = gen_rtx_REG (DImode, regno + 6);
5155 ops[4] = operands[1];
5156 ops[5] = operands[3];
5157 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5161 [(set_attr "type" "neon_load4_one_lane<q>")]
5164 (define_insn "neon_vld4_lane<mode>"
5165 [(set (match_operand:XI 0 "s_register_operand" "=w")
5166 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5167 (match_operand:XI 2 "s_register_operand" "0")
5168 (match_operand:SI 3 "immediate_operand" "i")
5169 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5173 HOST_WIDE_INT lane = INTVAL (operands[3]);
5174 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5175 int regno = REGNO (operands[0]);
5177 if (lane < 0 || lane >= max)
5178 error ("lane out of range");
5179 else if (lane >= max / 2)
5184 ops[0] = gen_rtx_REG (DImode, regno);
5185 ops[1] = gen_rtx_REG (DImode, regno + 4);
5186 ops[2] = gen_rtx_REG (DImode, regno + 8);
5187 ops[3] = gen_rtx_REG (DImode, regno + 12);
5188 ops[4] = operands[1];
5189 ops[5] = GEN_INT (lane);
5190 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5194 [(set_attr "type" "neon_load4_one_lane<q>")]
5197 (define_insn "neon_vld4_dup<mode>"
5198 [(set (match_operand:OI 0 "s_register_operand" "=w")
5199 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5200 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5206 int regno = REGNO (operands[0]);
5208 ops[0] = gen_rtx_REG (DImode, regno);
5209 ops[1] = gen_rtx_REG (DImode, regno + 2);
5210 ops[2] = gen_rtx_REG (DImode, regno + 4);
5211 ops[3] = gen_rtx_REG (DImode, regno + 6);
5212 ops[4] = operands[1];
5213 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5218 return "vld1.<V_sz_elem>\t%h0, %A1";
5221 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5222 (const_string "neon_load4_all_lanes<q>")
5223 (const_string "neon_load1_1reg<q>")))]
5226 (define_expand "vec_store_lanesoi<mode>"
5227 [(set (match_operand:OI 0 "neon_struct_operand")
5228 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5229 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5233 (define_insn "neon_vst4<mode>"
5234 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5235 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5236 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 if (<V_sz_elem> == 64)
5241 return "vst1.64\t%h1, %A0";
5243 return "vst4.<V_sz_elem>\t%h1, %A0";
5246 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5247 (const_string "neon_store1_4reg<q>")
5248 (const_string "neon_store4_4reg<q>")))]
5251 (define_expand "vec_store_lanesxi<mode>"
5252 [(match_operand:XI 0 "neon_struct_operand")
5253 (match_operand:XI 1 "s_register_operand")
5254 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5257 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5261 (define_expand "neon_vst4<mode>"
5262 [(match_operand:XI 0 "neon_struct_operand")
5263 (match_operand:XI 1 "s_register_operand")
5264 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5269 mem = adjust_address (operands[0], OImode, 0);
5270 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5271 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5272 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5276 (define_insn "neon_vst4qa<mode>"
5277 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5278 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5279 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5283 int regno = REGNO (operands[1]);
5285 ops[0] = operands[0];
5286 ops[1] = gen_rtx_REG (DImode, regno);
5287 ops[2] = gen_rtx_REG (DImode, regno + 4);
5288 ops[3] = gen_rtx_REG (DImode, regno + 8);
5289 ops[4] = gen_rtx_REG (DImode, regno + 12);
5290 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5293 [(set_attr "type" "neon_store4_4reg<q>")]
5296 (define_insn "neon_vst4qb<mode>"
5297 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5298 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5299 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5303 int regno = REGNO (operands[1]);
5305 ops[0] = operands[0];
5306 ops[1] = gen_rtx_REG (DImode, regno + 2);
5307 ops[2] = gen_rtx_REG (DImode, regno + 6);
5308 ops[3] = gen_rtx_REG (DImode, regno + 10);
5309 ops[4] = gen_rtx_REG (DImode, regno + 14);
5310 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5313 [(set_attr "type" "neon_store4_4reg<q>")]
5316 (define_insn "neon_vst4_lane<mode>"
5317 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5318 (unspec:<V_four_elem>
5319 [(match_operand:OI 1 "s_register_operand" "w")
5320 (match_operand:SI 2 "immediate_operand" "i")
5321 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5325 HOST_WIDE_INT lane = INTVAL (operands[2]);
5326 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5327 int regno = REGNO (operands[1]);
5329 if (lane < 0 || lane >= max)
5330 error ("lane out of range");
5331 ops[0] = operands[0];
5332 ops[1] = gen_rtx_REG (DImode, regno);
5333 ops[2] = gen_rtx_REG (DImode, regno + 2);
5334 ops[3] = gen_rtx_REG (DImode, regno + 4);
5335 ops[4] = gen_rtx_REG (DImode, regno + 6);
5336 ops[5] = operands[2];
5337 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5341 [(set_attr "type" "neon_store4_one_lane<q>")]
5344 (define_insn "neon_vst4_lane<mode>"
5345 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5346 (unspec:<V_four_elem>
5347 [(match_operand:XI 1 "s_register_operand" "w")
5348 (match_operand:SI 2 "immediate_operand" "i")
5349 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5353 HOST_WIDE_INT lane = INTVAL (operands[2]);
5354 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5355 int regno = REGNO (operands[1]);
5357 if (lane < 0 || lane >= max)
5358 error ("lane out of range");
5359 else if (lane >= max / 2)
5364 ops[0] = operands[0];
5365 ops[1] = gen_rtx_REG (DImode, regno);
5366 ops[2] = gen_rtx_REG (DImode, regno + 4);
5367 ops[3] = gen_rtx_REG (DImode, regno + 8);
5368 ops[4] = gen_rtx_REG (DImode, regno + 12);
5369 ops[5] = GEN_INT (lane);
5370 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5374 [(set_attr "type" "neon_store4_4reg<q>")]
5377 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5378 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5379 (SE:<V_unpack> (vec_select:<V_HALF>
5380 (match_operand:VU 1 "register_operand" "w")
5381 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5382 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5383 "vmovl.<US><V_sz_elem> %q0, %e1"
5384 [(set_attr "type" "neon_shift_imm_long")]
5387 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5388 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5389 (SE:<V_unpack> (vec_select:<V_HALF>
5390 (match_operand:VU 1 "register_operand" "w")
5391 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5392 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5393 "vmovl.<US><V_sz_elem> %q0, %f1"
5394 [(set_attr "type" "neon_shift_imm_long")]
5397 (define_expand "vec_unpack<US>_hi_<mode>"
5398 [(match_operand:<V_unpack> 0 "register_operand" "")
5399 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5400 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5402 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5405 for (i = 0; i < (<V_mode_nunits>/2); i++)
5406 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5408 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5409 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5416 (define_expand "vec_unpack<US>_lo_<mode>"
5417 [(match_operand:<V_unpack> 0 "register_operand" "")
5418 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5419 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5421 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5424 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5425 RTVEC_ELT (v, i) = GEN_INT (i);
5426 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5427 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5434 (define_insn "neon_vec_<US>mult_lo_<mode>"
5435 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5436 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5437 (match_operand:VU 1 "register_operand" "w")
5438 (match_operand:VU 2 "vect_par_constant_low" "")))
5439 (SE:<V_unpack> (vec_select:<V_HALF>
5440 (match_operand:VU 3 "register_operand" "w")
5442 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5443 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5444 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5447 (define_expand "vec_widen_<US>mult_lo_<mode>"
5448 [(match_operand:<V_unpack> 0 "register_operand" "")
5449 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5450 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5451 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5453 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5456 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5457 RTVEC_ELT (v, i) = GEN_INT (i);
5458 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5460 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5468 (define_insn "neon_vec_<US>mult_hi_<mode>"
5469 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5470 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5471 (match_operand:VU 1 "register_operand" "w")
5472 (match_operand:VU 2 "vect_par_constant_high" "")))
5473 (SE:<V_unpack> (vec_select:<V_HALF>
5474 (match_operand:VU 3 "register_operand" "w")
5476 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5477 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5478 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5481 (define_expand "vec_widen_<US>mult_hi_<mode>"
5482 [(match_operand:<V_unpack> 0 "register_operand" "")
5483 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5484 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5485 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5487 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5490 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5491 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5492 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5494 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5503 (define_insn "neon_vec_<US>shiftl_<mode>"
5504 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5505 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5506 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5509 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5511 [(set_attr "type" "neon_shift_imm_long")]
5514 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5515 [(match_operand:<V_unpack> 0 "register_operand" "")
5516 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5517 (match_operand:SI 2 "immediate_operand" "i")]
5518 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5520 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5521 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5527 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5528 [(match_operand:<V_unpack> 0 "register_operand" "")
5529 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5530 (match_operand:SI 2 "immediate_operand" "i")]
5531 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5533 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5534 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5535 GET_MODE_SIZE (<V_HALF>mode)),
5541 ;; Vectorize for non-neon-quad case
5542 (define_insn "neon_unpack<US>_<mode>"
5543 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5544 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5546 "vmovl.<US><V_sz_elem> %q0, %P1"
5547 [(set_attr "type" "neon_move")]
5550 (define_expand "vec_unpack<US>_lo_<mode>"
5551 [(match_operand:<V_double_width> 0 "register_operand" "")
5552 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5555 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5556 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5557 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5563 (define_expand "vec_unpack<US>_hi_<mode>"
5564 [(match_operand:<V_double_width> 0 "register_operand" "")
5565 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5568 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5569 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5570 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5576 (define_insn "neon_vec_<US>mult_<mode>"
5577 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5578 (mult:<V_widen> (SE:<V_widen>
5579 (match_operand:VDI 1 "register_operand" "w"))
5581 (match_operand:VDI 2 "register_operand" "w"))))]
5583 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5584 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5587 (define_expand "vec_widen_<US>mult_hi_<mode>"
5588 [(match_operand:<V_double_width> 0 "register_operand" "")
5589 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5590 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5593 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5594 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5595 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5602 (define_expand "vec_widen_<US>mult_lo_<mode>"
5603 [(match_operand:<V_double_width> 0 "register_operand" "")
5604 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5605 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5608 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5609 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5610 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5617 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5618 [(match_operand:<V_double_width> 0 "register_operand" "")
5619 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5620 (match_operand:SI 2 "immediate_operand" "i")]
5623 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5624 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5625 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5631 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5632 [(match_operand:<V_double_width> 0 "register_operand" "")
5633 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5634 (match_operand:SI 2 "immediate_operand" "i")]
5637 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5638 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5639 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5645 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5646 ; because the ordering of vector elements in Q registers is different from what
5647 ; the semantics of the instructions require.
5649 (define_insn "vec_pack_trunc_<mode>"
5650 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5651 (vec_concat:<V_narrow_pack>
5652 (truncate:<V_narrow>
5653 (match_operand:VN 1 "register_operand" "w"))
5654 (truncate:<V_narrow>
5655 (match_operand:VN 2 "register_operand" "w"))))]
5656 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5657 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5658 [(set_attr "type" "multiple")
5659 (set_attr "length" "8")]
5662 ;; For the non-quad case.
5663 (define_insn "neon_vec_pack_trunc_<mode>"
5664 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5665 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5666 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5667 "vmovn.i<V_sz_elem>\t%P0, %q1"
5668 [(set_attr "type" "neon_move_narrow_q")]
5671 (define_expand "vec_pack_trunc_<mode>"
5672 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5673 (match_operand:VSHFT 1 "register_operand" "")
5674 (match_operand:VSHFT 2 "register_operand")]
5675 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5677 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5679 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5680 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5681 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5685 (define_insn "neon_vabd<mode>_2"
5686 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5687 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5688 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5689 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5690 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5692 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5693 (const_string "neon_fp_abd_s<q>")
5694 (const_string "neon_abd<q>")))]
5697 (define_insn "neon_vabd<mode>_3"
5698 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5699 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5700 (match_operand:VDQ 2 "s_register_operand" "w")]
5702 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5703 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5705 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5706 (const_string "neon_fp_abd_s<q>")
5707 (const_string "neon_abd<q>")))]
5710 ;; Copy from core-to-neon regs, then extend, not vice-versa
5713 [(set (match_operand:DI 0 "s_register_operand" "")
5714 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5715 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5716 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5717 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5719 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5723 [(set (match_operand:DI 0 "s_register_operand" "")
5724 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5725 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5726 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5727 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5729 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5733 [(set (match_operand:DI 0 "s_register_operand" "")
5734 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5735 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5736 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5737 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5739 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5743 [(set (match_operand:DI 0 "s_register_operand" "")
5744 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5745 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5746 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5747 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5749 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5753 [(set (match_operand:DI 0 "s_register_operand" "")
5754 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5755 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5756 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5757 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5759 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5763 [(set (match_operand:DI 0 "s_register_operand" "")
5764 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5765 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5766 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5767 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5769 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));