1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; Helpers for quad-word reduction operations
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1285 ;; Reduction operations
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1319 rtx vec = gen_reg_rtx (V2DImode);
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1503 ;; Saturating arithmetic
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1574 switch (GET_CODE (operands[3]))
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1592 switch (GET_CODE (operands[3]))
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1624 switch (GET_CODE (operands[3]))
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1643 switch (GET_CODE (operands[3]))
1646 base_comparison = gen_neon_vclt<mode>;
1649 base_comparison = gen_neon_vcle<mode>;
1652 /* Do nothing, other zero form cases already have the correct
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1673 Our transformations are:
1678 a NE b -> !(a EQ b) */
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1685 swap_bsl_operands = 1;
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1731 int inverse = 0, immediate_zero = 0;
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1740 switch (GET_CODE (operands[3]))
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1787 ;; Patterns for builtins.
1789 ; good for plain vadd, vaddq.
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2017 (define_insn "neon_vqdmlal<mode>"
2018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2019 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2020 (match_operand:VMDI 2 "s_register_operand" "w")
2021 (match_operand:VMDI 3 "s_register_operand" "w")]
2024 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2025 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2028 (define_insn "neon_vqdmlsl<mode>"
2029 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2030 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2031 (match_operand:VMDI 2 "s_register_operand" "w")
2032 (match_operand:VMDI 3 "s_register_operand" "w")]
2035 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2036 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2039 (define_insn "neon_vmull<sup><mode>"
2040 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2041 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2042 (match_operand:VW 2 "s_register_operand" "w")]
2045 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2046 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2049 (define_insn "neon_vqdmull<mode>"
2050 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2051 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2052 (match_operand:VMDI 2 "s_register_operand" "w")]
2055 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2056 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2059 (define_expand "neon_vsub<mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2061 (match_operand:VCVTF 1 "s_register_operand" "w")
2062 (match_operand:VCVTF 2 "s_register_operand" "w")]
2065 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2066 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2068 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2073 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2075 (define_insn "neon_vsub<mode>_unspec"
2076 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2077 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2078 (match_operand:VCVTF 2 "s_register_operand" "w")]
2081 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_addsub_s<q>")
2085 (const_string "neon_sub<q>")))]
2088 (define_insn "neon_vsubl<sup><mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2091 (match_operand:VDI 2 "s_register_operand" "w")]
2094 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2095 [(set_attr "type" "neon_sub_long")]
2098 (define_insn "neon_vsubw<sup><mode>"
2099 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2100 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2101 (match_operand:VDI 2 "s_register_operand" "w")]
2104 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2105 [(set_attr "type" "neon_sub_widen")]
2108 (define_insn "neon_vqsub<sup><mode>"
2109 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2110 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2111 (match_operand:VDQIX 2 "s_register_operand" "w")]
2114 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2115 [(set_attr "type" "neon_qsub<q>")]
2118 (define_insn "neon_vhsub<sup><mode>"
2119 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2120 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2121 (match_operand:VDQIW 2 "s_register_operand" "w")]
2124 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125 [(set_attr "type" "neon_sub_halve<q>")]
2128 (define_insn "neon_v<r>subhn<mode>"
2129 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2130 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2131 (match_operand:VN 2 "s_register_operand" "w")]
2134 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2135 [(set_attr "type" "neon_sub_halve_narrow_q")]
2138 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2139 ;; without unsafe math optimizations.
2140 (define_expand "neon_vc<cmp_op><mode>"
2141 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2143 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2144 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2147 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2149 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2150 && !flag_unsafe_math_optimizations)
2152 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2153 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2154 whereas this expander iterates over the integer modes as well,
2155 but we will never expand to UNSPECs for the integer comparisons. */
2159 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2164 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2173 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2180 (define_insn "neon_vc<cmp_op><mode>_insn"
2181 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2183 (COMPARISONS:<V_cmp_result>
2184 (match_operand:VDQW 1 "s_register_operand" "w,w")
2185 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2186 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2187 && !flag_unsafe_math_optimizations)"
2190 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2192 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2193 ? "f" : "<cmp_type>",
2194 which_alternative == 0
2195 ? "%<V_reg>2" : "#0");
2196 output_asm_insn (pattern, operands);
2200 (if_then_else (match_operand 2 "zero_operand")
2201 (const_string "neon_compare_zero<q>")
2202 (const_string "neon_compare<q>")))]
2205 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2206 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2207 (unspec:<V_cmp_result>
2208 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2209 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2214 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2216 which_alternative == 0
2217 ? "%<V_reg>2" : "#0");
2218 output_asm_insn (pattern, operands);
2221 [(set_attr "type" "neon_fp_compare_s<q>")]
2224 (define_insn "neon_vc<cmp_op>u<mode>"
2225 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2227 (GTUGEU:<V_cmp_result>
2228 (match_operand:VDQIW 1 "s_register_operand" "w")
2229 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2231 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2232 [(set_attr "type" "neon_compare<q>")]
2235 (define_expand "neon_vca<cmp_op><mode>"
2236 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2238 (GTGE:<V_cmp_result>
2239 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2240 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2243 if (flag_unsafe_math_optimizations)
2244 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2247 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2254 (define_insn "neon_vca<cmp_op><mode>_insn"
2255 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2257 (GTGE:<V_cmp_result>
2258 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2259 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2260 "TARGET_NEON && flag_unsafe_math_optimizations"
2261 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2262 [(set_attr "type" "neon_fp_compare_s<q>")]
2265 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2266 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2267 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2268 (match_operand:VCVTF 2 "s_register_operand" "w")]
2271 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2272 [(set_attr "type" "neon_fp_compare_s<q>")]
2275 (define_insn "neon_vtst<mode>"
2276 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2277 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2278 (match_operand:VDQIW 2 "s_register_operand" "w")]
2281 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2282 [(set_attr "type" "neon_tst<q>")]
2285 (define_insn "neon_vabd<sup><mode>"
2286 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2287 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2288 (match_operand:VDQIW 2 "s_register_operand" "w")]
2291 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2292 [(set_attr "type" "neon_abd<q>")]
2295 (define_insn "neon_vabdf<mode>"
2296 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2297 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2298 (match_operand:VCVTF 2 "s_register_operand" "w")]
2301 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2302 [(set_attr "type" "neon_fp_abd_s<q>")]
2305 (define_insn "neon_vabdl<sup><mode>"
2306 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2307 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2308 (match_operand:VW 2 "s_register_operand" "w")]
2311 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2312 [(set_attr "type" "neon_abd_long")]
2315 (define_insn "neon_vaba<sup><mode>"
2316 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2317 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2318 (match_operand:VDQIW 3 "s_register_operand" "w")]
2320 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2322 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2323 [(set_attr "type" "neon_arith_acc<q>")]
2326 (define_insn "neon_vabal<sup><mode>"
2327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2328 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2329 (match_operand:VW 3 "s_register_operand" "w")]
2331 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2333 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2334 [(set_attr "type" "neon_arith_acc<q>")]
2337 (define_insn "neon_v<maxmin><sup><mode>"
2338 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2339 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2340 (match_operand:VDQIW 2 "s_register_operand" "w")]
2343 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2344 [(set_attr "type" "neon_minmax<q>")]
2347 (define_insn "neon_v<maxmin>f<mode>"
2348 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2349 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2350 (match_operand:VCVTF 2 "s_register_operand" "w")]
2353 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2354 [(set_attr "type" "neon_fp_minmax_s<q>")]
2357 (define_expand "neon_vpadd<mode>"
2358 [(match_operand:VD 0 "s_register_operand" "=w")
2359 (match_operand:VD 1 "s_register_operand" "w")
2360 (match_operand:VD 2 "s_register_operand" "w")]
2363 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2368 (define_insn "neon_vpaddl<sup><mode>"
2369 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2370 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2373 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2374 [(set_attr "type" "neon_reduc_add_long")]
2377 (define_insn "neon_vpadal<sup><mode>"
2378 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2379 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2380 (match_operand:VDQIW 2 "s_register_operand" "w")]
2383 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2384 [(set_attr "type" "neon_reduc_add_acc")]
2387 (define_insn "neon_vp<maxmin><sup><mode>"
2388 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2389 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2390 (match_operand:VDI 2 "s_register_operand" "w")]
2393 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2394 [(set_attr "type" "neon_reduc_minmax<q>")]
2397 (define_insn "neon_vp<maxmin>f<mode>"
2398 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2399 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2400 (match_operand:VCVTF 2 "s_register_operand" "w")]
2403 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2404 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2407 (define_insn "neon_vrecps<mode>"
2408 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2409 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2410 (match_operand:VCVTF 2 "s_register_operand" "w")]
2413 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2414 [(set_attr "type" "neon_fp_recps_s<q>")]
2417 (define_insn "neon_vrsqrts<mode>"
2418 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2419 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2420 (match_operand:VCVTF 2 "s_register_operand" "w")]
2423 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2424 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2427 (define_expand "neon_vabs<mode>"
2428 [(match_operand:VDQW 0 "s_register_operand" "")
2429 (match_operand:VDQW 1 "s_register_operand" "")]
2432 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2436 (define_insn "neon_vqabs<mode>"
2437 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2438 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2441 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2442 [(set_attr "type" "neon_qabs<q>")]
2445 (define_insn "neon_bswap<mode>"
2446 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2447 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2449 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2450 [(set_attr "type" "neon_rev<q>")]
2453 (define_expand "neon_vneg<mode>"
2454 [(match_operand:VDQW 0 "s_register_operand" "")
2455 (match_operand:VDQW 1 "s_register_operand" "")]
2458 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2462 (define_expand "neon_copysignf<mode>"
2463 [(match_operand:VCVTF 0 "register_operand")
2464 (match_operand:VCVTF 1 "register_operand")
2465 (match_operand:VCVTF 2 "register_operand")]
2469 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2470 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2471 rtvec v = rtvec_alloc (n_elt);
2473 /* Create bitmask for vector select. */
2474 for (i = 0; i < n_elt; ++i)
2475 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2477 emit_move_insn (v_bitmask,
2478 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2479 emit_move_insn (operands[0], operands[2]);
2480 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2481 <VCVTF:V_cmp_result>mode, 0);
2482 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2489 (define_insn "neon_vqneg<mode>"
2490 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2491 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2494 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2495 [(set_attr "type" "neon_qneg<q>")]
2498 (define_insn "neon_vcls<mode>"
2499 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2500 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2503 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2504 [(set_attr "type" "neon_cls<q>")]
2507 (define_insn "clz<mode>2"
2508 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2509 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2511 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2512 [(set_attr "type" "neon_cnt<q>")]
2515 (define_expand "neon_vclz<mode>"
2516 [(match_operand:VDQIW 0 "s_register_operand" "")
2517 (match_operand:VDQIW 1 "s_register_operand" "")]
2520 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2524 (define_insn "popcount<mode>2"
2525 [(set (match_operand:VE 0 "s_register_operand" "=w")
2526 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2528 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2529 [(set_attr "type" "neon_cnt<q>")]
2532 (define_expand "neon_vcnt<mode>"
2533 [(match_operand:VE 0 "s_register_operand" "=w")
2534 (match_operand:VE 1 "s_register_operand" "w")]
2537 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2541 (define_insn "neon_vrecpe<mode>"
2542 [(set (match_operand:V32 0 "s_register_operand" "=w")
2543 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2546 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_fp_recpe_s<q>")]
2550 (define_insn "neon_vrsqrte<mode>"
2551 [(set (match_operand:V32 0 "s_register_operand" "=w")
2552 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2555 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2556 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2559 (define_expand "neon_vmvn<mode>"
2560 [(match_operand:VDQIW 0 "s_register_operand" "")
2561 (match_operand:VDQIW 1 "s_register_operand" "")]
2564 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2568 (define_insn "neon_vget_lane<mode>_sext_internal"
2569 [(set (match_operand:SI 0 "s_register_operand" "=r")
2571 (vec_select:<V_elem>
2572 (match_operand:VD 1 "s_register_operand" "w")
2573 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2576 if (BYTES_BIG_ENDIAN)
2578 int elt = INTVAL (operands[2]);
2579 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2580 operands[2] = GEN_INT (elt);
2582 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2584 [(set_attr "type" "neon_to_gp")]
2587 (define_insn "neon_vget_lane<mode>_zext_internal"
2588 [(set (match_operand:SI 0 "s_register_operand" "=r")
2590 (vec_select:<V_elem>
2591 (match_operand:VD 1 "s_register_operand" "w")
2592 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2595 if (BYTES_BIG_ENDIAN)
2597 int elt = INTVAL (operands[2]);
2598 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2599 operands[2] = GEN_INT (elt);
2601 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2603 [(set_attr "type" "neon_to_gp")]
2606 (define_insn "neon_vget_lane<mode>_sext_internal"
2607 [(set (match_operand:SI 0 "s_register_operand" "=r")
2609 (vec_select:<V_elem>
2610 (match_operand:VQ 1 "s_register_operand" "w")
2611 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2615 int regno = REGNO (operands[1]);
2616 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2617 unsigned int elt = INTVAL (operands[2]);
2618 unsigned int elt_adj = elt % halfelts;
2620 if (BYTES_BIG_ENDIAN)
2621 elt_adj = halfelts - 1 - elt_adj;
2623 ops[0] = operands[0];
2624 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2625 ops[2] = GEN_INT (elt_adj);
2626 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2630 [(set_attr "type" "neon_to_gp_q")]
2633 (define_insn "neon_vget_lane<mode>_zext_internal"
2634 [(set (match_operand:SI 0 "s_register_operand" "=r")
2636 (vec_select:<V_elem>
2637 (match_operand:VQ 1 "s_register_operand" "w")
2638 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2642 int regno = REGNO (operands[1]);
2643 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2644 unsigned int elt = INTVAL (operands[2]);
2645 unsigned int elt_adj = elt % halfelts;
2647 if (BYTES_BIG_ENDIAN)
2648 elt_adj = halfelts - 1 - elt_adj;
2650 ops[0] = operands[0];
2651 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2652 ops[2] = GEN_INT (elt_adj);
2653 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2657 [(set_attr "type" "neon_to_gp_q")]
2660 (define_expand "neon_vget_lane<mode>"
2661 [(match_operand:<V_ext> 0 "s_register_operand" "")
2662 (match_operand:VDQW 1 "s_register_operand" "")
2663 (match_operand:SI 2 "immediate_operand" "")]
2666 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2668 if (BYTES_BIG_ENDIAN)
2670 /* The intrinsics are defined in terms of a model where the
2671 element ordering in memory is vldm order, whereas the generic
2672 RTL is defined in terms of a model where the element ordering
2673 in memory is array order. Convert the lane number to conform
2675 unsigned int elt = INTVAL (operands[2]);
2676 unsigned int reg_nelts
2677 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2678 elt ^= reg_nelts - 1;
2679 operands[2] = GEN_INT (elt);
2682 if (GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2683 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2685 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2691 (define_expand "neon_vget_laneu<mode>"
2692 [(match_operand:<V_ext> 0 "s_register_operand" "")
2693 (match_operand:VDQIW 1 "s_register_operand" "")
2694 (match_operand:SI 2 "immediate_operand" "")]
2697 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2699 if (BYTES_BIG_ENDIAN)
2701 /* The intrinsics are defined in terms of a model where the
2702 element ordering in memory is vldm order, whereas the generic
2703 RTL is defined in terms of a model where the element ordering
2704 in memory is array order. Convert the lane number to conform
2706 unsigned int elt = INTVAL (operands[2]);
2707 unsigned int reg_nelts
2708 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2709 elt ^= reg_nelts - 1;
2710 operands[2] = GEN_INT (elt);
2713 if (GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2714 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2716 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2722 (define_expand "neon_vget_lanedi"
2723 [(match_operand:DI 0 "s_register_operand" "=r")
2724 (match_operand:DI 1 "s_register_operand" "w")
2725 (match_operand:SI 2 "immediate_operand" "")]
2728 neon_lane_bounds (operands[2], 0, 1);
2729 emit_move_insn (operands[0], operands[1]);
2733 (define_expand "neon_vget_lanev2di"
2734 [(match_operand:DI 0 "s_register_operand" "")
2735 (match_operand:V2DI 1 "s_register_operand" "")
2736 (match_operand:SI 2 "immediate_operand" "")]
2739 switch (INTVAL (operands[2]))
2742 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2745 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2748 neon_lane_bounds (operands[2], 0, 1);
2754 (define_expand "neon_vset_lane<mode>"
2755 [(match_operand:VDQ 0 "s_register_operand" "=w")
2756 (match_operand:<V_elem> 1 "s_register_operand" "r")
2757 (match_operand:VDQ 2 "s_register_operand" "0")
2758 (match_operand:SI 3 "immediate_operand" "i")]
2761 unsigned int elt = INTVAL (operands[3]);
2762 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2764 if (BYTES_BIG_ENDIAN)
2766 unsigned int reg_nelts
2767 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2768 elt ^= reg_nelts - 1;
2771 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2772 GEN_INT (1 << elt), operands[2]));
2776 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2778 (define_expand "neon_vset_lanedi"
2779 [(match_operand:DI 0 "s_register_operand" "=w")
2780 (match_operand:DI 1 "s_register_operand" "r")
2781 (match_operand:DI 2 "s_register_operand" "0")
2782 (match_operand:SI 3 "immediate_operand" "i")]
2785 neon_lane_bounds (operands[3], 0, 1);
2786 emit_move_insn (operands[0], operands[1]);
2790 (define_expand "neon_vcreate<mode>"
2791 [(match_operand:VDX 0 "s_register_operand" "")
2792 (match_operand:DI 1 "general_operand" "")]
2795 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2796 emit_move_insn (operands[0], src);
2800 (define_insn "neon_vdup_n<mode>"
2801 [(set (match_operand:VX 0 "s_register_operand" "=w")
2802 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2804 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2805 [(set_attr "type" "neon_from_gp<q>")]
2808 (define_insn "neon_vdup_n<mode>"
2809 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2810 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2813 vdup.<V_sz_elem>\t%<V_reg>0, %1
2814 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2815 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2818 (define_expand "neon_vdup_ndi"
2819 [(match_operand:DI 0 "s_register_operand" "=w")
2820 (match_operand:DI 1 "s_register_operand" "r")]
2823 emit_move_insn (operands[0], operands[1]);
2828 (define_insn "neon_vdup_nv2di"
2829 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2830 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2833 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2834 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2835 [(set_attr "length" "8")
2836 (set_attr "type" "multiple")]
2839 (define_insn "neon_vdup_lane<mode>_internal"
2840 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2842 (vec_select:<V_elem>
2843 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2844 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2847 if (BYTES_BIG_ENDIAN)
2849 int elt = INTVAL (operands[2]);
2850 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2851 operands[2] = GEN_INT (elt);
2854 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2856 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2858 [(set_attr "type" "neon_dup<q>")]
2861 (define_expand "neon_vdup_lane<mode>"
2862 [(match_operand:VDQW 0 "s_register_operand" "=w")
2863 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2864 (match_operand:SI 2 "immediate_operand" "i")]
2867 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2868 if (BYTES_BIG_ENDIAN)
2870 unsigned int elt = INTVAL (operands[2]);
2871 unsigned int reg_nelts
2872 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2873 elt ^= reg_nelts - 1;
2874 operands[2] = GEN_INT (elt);
2876 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2881 ; Scalar index is ignored, since only zero is valid here.
2882 (define_expand "neon_vdup_lanedi"
2883 [(match_operand:DI 0 "s_register_operand" "=w")
2884 (match_operand:DI 1 "s_register_operand" "w")
2885 (match_operand:SI 2 "immediate_operand" "i")]
2888 neon_lane_bounds (operands[2], 0, 1);
2889 emit_move_insn (operands[0], operands[1]);
2893 ; Likewise for v2di, as the DImode second operand has only a single element.
2894 (define_expand "neon_vdup_lanev2di"
2895 [(match_operand:V2DI 0 "s_register_operand" "=w")
2896 (match_operand:DI 1 "s_register_operand" "w")
2897 (match_operand:SI 2 "immediate_operand" "i")]
2900 neon_lane_bounds (operands[2], 0, 1);
2901 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2905 ; Disabled before reload because we don't want combine doing something silly,
2906 ; but used by the post-reload expansion of neon_vcombine.
2907 (define_insn "*neon_vswp<mode>"
2908 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2909 (match_operand:VDQX 1 "s_register_operand" "+w"))
2910 (set (match_dup 1) (match_dup 0))]
2911 "TARGET_NEON && reload_completed"
2912 "vswp\t%<V_reg>0, %<V_reg>1"
2913 [(set_attr "type" "neon_permute<q>")]
2916 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2918 ;; FIXME: A different implementation of this builtin could make it much
2919 ;; more likely that we wouldn't actually need to output anything (we could make
2920 ;; it so that the reg allocator puts things in the right places magically
2921 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2923 (define_insn_and_split "neon_vcombine<mode>"
2924 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2925 (vec_concat:<V_DOUBLE>
2926 (match_operand:VDX 1 "s_register_operand" "w")
2927 (match_operand:VDX 2 "s_register_operand" "w")))]
2930 "&& reload_completed"
2933 neon_split_vcombine (operands);
2936 [(set_attr "type" "multiple")]
2939 (define_expand "neon_vget_high<mode>"
2940 [(match_operand:<V_HALF> 0 "s_register_operand")
2941 (match_operand:VQX 1 "s_register_operand")]
2944 emit_move_insn (operands[0],
2945 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2946 GET_MODE_SIZE (<V_HALF>mode)));
2950 (define_expand "neon_vget_low<mode>"
2951 [(match_operand:<V_HALF> 0 "s_register_operand")
2952 (match_operand:VQX 1 "s_register_operand")]
2955 emit_move_insn (operands[0],
2956 simplify_gen_subreg (<V_HALF>mode, operands[1],
2961 (define_insn "float<mode><V_cvtto>2"
2962 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2963 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2964 "TARGET_NEON && !flag_rounding_math"
2965 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2966 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2969 (define_insn "floatuns<mode><V_cvtto>2"
2970 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2971 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2972 "TARGET_NEON && !flag_rounding_math"
2973 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2974 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2977 (define_insn "fix_trunc<mode><V_cvtto>2"
2978 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2979 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2981 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2982 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2985 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2986 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2987 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2989 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
2990 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2993 (define_insn "neon_vcvt<sup><mode>"
2994 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2995 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
2998 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
2999 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3002 (define_insn "neon_vcvt<sup><mode>"
3003 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3004 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3007 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3008 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3011 (define_insn "neon_vcvtv4sfv4hf"
3012 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3013 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3015 "TARGET_NEON && TARGET_FP16"
3016 "vcvt.f32.f16\t%q0, %P1"
3017 [(set_attr "type" "neon_fp_cvt_widen_h")]
3020 (define_insn "neon_vcvtv4hfv4sf"
3021 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3022 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3024 "TARGET_NEON && TARGET_FP16"
3025 "vcvt.f16.f32\t%P0, %q1"
3026 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3029 (define_insn "neon_vcvt<sup>_n<mode>"
3030 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3031 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3032 (match_operand:SI 2 "immediate_operand" "i")]
3036 neon_const_bounds (operands[2], 1, 33);
3037 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3039 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3042 (define_insn "neon_vcvt<sup>_n<mode>"
3043 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3044 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3045 (match_operand:SI 2 "immediate_operand" "i")]
3049 neon_const_bounds (operands[2], 1, 33);
3050 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3052 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3055 (define_insn "neon_vmovn<mode>"
3056 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3057 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3060 "vmovn.<V_if_elem>\t%P0, %q1"
3061 [(set_attr "type" "neon_shift_imm_narrow_q")]
3064 (define_insn "neon_vqmovn<sup><mode>"
3065 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3066 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3069 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3070 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3073 (define_insn "neon_vqmovun<mode>"
3074 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3075 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3078 "vqmovun.<V_s_elem>\t%P0, %q1"
3079 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3082 (define_insn "neon_vmovl<sup><mode>"
3083 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3084 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3087 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3088 [(set_attr "type" "neon_shift_imm_long")]
3091 (define_insn "neon_vmul_lane<mode>"
3092 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3093 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3094 (match_operand:VMD 2 "s_register_operand"
3095 "<scalar_mul_constraint>")
3096 (match_operand:SI 3 "immediate_operand" "i")]
3100 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3101 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3104 (if_then_else (match_test "<Is_float_mode>")
3105 (const_string "neon_fp_mul_s_scalar<q>")
3106 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3109 (define_insn "neon_vmul_lane<mode>"
3110 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3111 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3112 (match_operand:<V_HALF> 2 "s_register_operand"
3113 "<scalar_mul_constraint>")
3114 (match_operand:SI 3 "immediate_operand" "i")]
3118 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3119 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3122 (if_then_else (match_test "<Is_float_mode>")
3123 (const_string "neon_fp_mul_s_scalar<q>")
3124 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3127 (define_insn "neon_vmull<sup>_lane<mode>"
3128 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3129 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3130 (match_operand:VMDI 2 "s_register_operand"
3131 "<scalar_mul_constraint>")
3132 (match_operand:SI 3 "immediate_operand" "i")]
3136 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3137 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3139 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3142 (define_insn "neon_vqdmull_lane<mode>"
3143 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3144 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3145 (match_operand:VMDI 2 "s_register_operand"
3146 "<scalar_mul_constraint>")
3147 (match_operand:SI 3 "immediate_operand" "i")]
3148 UNSPEC_VQDMULL_LANE))]
3151 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3152 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3154 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3157 (define_insn "neon_vq<r>dmulh_lane<mode>"
3158 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3159 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3160 (match_operand:<V_HALF> 2 "s_register_operand"
3161 "<scalar_mul_constraint>")
3162 (match_operand:SI 3 "immediate_operand" "i")]
3166 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3167 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3169 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3172 (define_insn "neon_vq<r>dmulh_lane<mode>"
3173 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3174 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3175 (match_operand:VMDI 2 "s_register_operand"
3176 "<scalar_mul_constraint>")
3177 (match_operand:SI 3 "immediate_operand" "i")]
3181 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3182 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3184 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3187 (define_insn "neon_vmla_lane<mode>"
3188 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3189 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3190 (match_operand:VMD 2 "s_register_operand" "w")
3191 (match_operand:VMD 3 "s_register_operand"
3192 "<scalar_mul_constraint>")
3193 (match_operand:SI 4 "immediate_operand" "i")]
3197 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3198 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3201 (if_then_else (match_test "<Is_float_mode>")
3202 (const_string "neon_fp_mla_s_scalar<q>")
3203 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3206 (define_insn "neon_vmla_lane<mode>"
3207 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3208 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3209 (match_operand:VMQ 2 "s_register_operand" "w")
3210 (match_operand:<V_HALF> 3 "s_register_operand"
3211 "<scalar_mul_constraint>")
3212 (match_operand:SI 4 "immediate_operand" "i")]
3216 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3217 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3220 (if_then_else (match_test "<Is_float_mode>")
3221 (const_string "neon_fp_mla_s_scalar<q>")
3222 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3225 (define_insn "neon_vmlal<sup>_lane<mode>"
3226 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3228 (match_operand:VMDI 2 "s_register_operand" "w")
3229 (match_operand:VMDI 3 "s_register_operand"
3230 "<scalar_mul_constraint>")
3231 (match_operand:SI 4 "immediate_operand" "i")]
3235 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3236 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3238 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3241 (define_insn "neon_vqdmlal_lane<mode>"
3242 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3243 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3244 (match_operand:VMDI 2 "s_register_operand" "w")
3245 (match_operand:VMDI 3 "s_register_operand"
3246 "<scalar_mul_constraint>")
3247 (match_operand:SI 4 "immediate_operand" "i")]
3248 UNSPEC_VQDMLAL_LANE))]
3251 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3252 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3254 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3257 (define_insn "neon_vmls_lane<mode>"
3258 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3259 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3260 (match_operand:VMD 2 "s_register_operand" "w")
3261 (match_operand:VMD 3 "s_register_operand"
3262 "<scalar_mul_constraint>")
3263 (match_operand:SI 4 "immediate_operand" "i")]
3267 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3268 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3271 (if_then_else (match_test "<Is_float_mode>")
3272 (const_string "neon_fp_mla_s_scalar<q>")
3273 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3276 (define_insn "neon_vmls_lane<mode>"
3277 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3278 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3279 (match_operand:VMQ 2 "s_register_operand" "w")
3280 (match_operand:<V_HALF> 3 "s_register_operand"
3281 "<scalar_mul_constraint>")
3282 (match_operand:SI 4 "immediate_operand" "i")]
3286 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3287 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3290 (if_then_else (match_test "<Is_float_mode>")
3291 (const_string "neon_fp_mla_s_scalar<q>")
3292 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3295 (define_insn "neon_vmlsl<sup>_lane<mode>"
3296 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3297 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3298 (match_operand:VMDI 2 "s_register_operand" "w")
3299 (match_operand:VMDI 3 "s_register_operand"
3300 "<scalar_mul_constraint>")
3301 (match_operand:SI 4 "immediate_operand" "i")]
3305 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3306 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3308 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3311 (define_insn "neon_vqdmlsl_lane<mode>"
3312 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3313 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3314 (match_operand:VMDI 2 "s_register_operand" "w")
3315 (match_operand:VMDI 3 "s_register_operand"
3316 "<scalar_mul_constraint>")
3317 (match_operand:SI 4 "immediate_operand" "i")]
3318 UNSPEC_VQDMLSL_LANE))]
3321 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3322 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3324 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3327 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3328 ; core register into a temp register, then use a scalar taken from that. This
3329 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3330 ; or extracted from another vector. The latter case it's currently better to
3331 ; use the "_lane" variant, and the former case can probably be implemented
3332 ; using vld1_lane, but that hasn't been done yet.
3334 (define_expand "neon_vmul_n<mode>"
3335 [(match_operand:VMD 0 "s_register_operand" "")
3336 (match_operand:VMD 1 "s_register_operand" "")
3337 (match_operand:<V_elem> 2 "s_register_operand" "")]
3340 rtx tmp = gen_reg_rtx (<MODE>mode);
3341 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3342 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3347 (define_expand "neon_vmul_n<mode>"
3348 [(match_operand:VMQ 0 "s_register_operand" "")
3349 (match_operand:VMQ 1 "s_register_operand" "")
3350 (match_operand:<V_elem> 2 "s_register_operand" "")]
3353 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3354 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3355 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3360 (define_expand "neon_vmulls_n<mode>"
3361 [(match_operand:<V_widen> 0 "s_register_operand" "")
3362 (match_operand:VMDI 1 "s_register_operand" "")
3363 (match_operand:<V_elem> 2 "s_register_operand" "")]
3366 rtx tmp = gen_reg_rtx (<MODE>mode);
3367 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3368 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3373 (define_expand "neon_vmullu_n<mode>"
3374 [(match_operand:<V_widen> 0 "s_register_operand" "")
3375 (match_operand:VMDI 1 "s_register_operand" "")
3376 (match_operand:<V_elem> 2 "s_register_operand" "")]
3379 rtx tmp = gen_reg_rtx (<MODE>mode);
3380 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3381 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3386 (define_expand "neon_vqdmull_n<mode>"
3387 [(match_operand:<V_widen> 0 "s_register_operand" "")
3388 (match_operand:VMDI 1 "s_register_operand" "")
3389 (match_operand:<V_elem> 2 "s_register_operand" "")]
3392 rtx tmp = gen_reg_rtx (<MODE>mode);
3393 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3394 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3399 (define_expand "neon_vqdmulh_n<mode>"
3400 [(match_operand:VMDI 0 "s_register_operand" "")
3401 (match_operand:VMDI 1 "s_register_operand" "")
3402 (match_operand:<V_elem> 2 "s_register_operand" "")]
3405 rtx tmp = gen_reg_rtx (<MODE>mode);
3406 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3407 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3412 (define_expand "neon_vqrdmulh_n<mode>"
3413 [(match_operand:VMDI 0 "s_register_operand" "")
3414 (match_operand:VMDI 1 "s_register_operand" "")
3415 (match_operand:<V_elem> 2 "s_register_operand" "")]
3418 rtx tmp = gen_reg_rtx (<MODE>mode);
3419 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3420 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3425 (define_expand "neon_vqdmulh_n<mode>"
3426 [(match_operand:VMQI 0 "s_register_operand" "")
3427 (match_operand:VMQI 1 "s_register_operand" "")
3428 (match_operand:<V_elem> 2 "s_register_operand" "")]
3431 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3432 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3433 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3438 (define_expand "neon_vqrdmulh_n<mode>"
3439 [(match_operand:VMQI 0 "s_register_operand" "")
3440 (match_operand:VMQI 1 "s_register_operand" "")
3441 (match_operand:<V_elem> 2 "s_register_operand" "")]
3444 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3445 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3446 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3451 (define_expand "neon_vmla_n<mode>"
3452 [(match_operand:VMD 0 "s_register_operand" "")
3453 (match_operand:VMD 1 "s_register_operand" "")
3454 (match_operand:VMD 2 "s_register_operand" "")
3455 (match_operand:<V_elem> 3 "s_register_operand" "")]
3458 rtx tmp = gen_reg_rtx (<MODE>mode);
3459 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3460 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3465 (define_expand "neon_vmla_n<mode>"
3466 [(match_operand:VMQ 0 "s_register_operand" "")
3467 (match_operand:VMQ 1 "s_register_operand" "")
3468 (match_operand:VMQ 2 "s_register_operand" "")
3469 (match_operand:<V_elem> 3 "s_register_operand" "")]
3472 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3473 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3474 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3479 (define_expand "neon_vmlals_n<mode>"
3480 [(match_operand:<V_widen> 0 "s_register_operand" "")
3481 (match_operand:<V_widen> 1 "s_register_operand" "")
3482 (match_operand:VMDI 2 "s_register_operand" "")
3483 (match_operand:<V_elem> 3 "s_register_operand" "")]
3486 rtx tmp = gen_reg_rtx (<MODE>mode);
3487 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3488 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3493 (define_expand "neon_vmlalu_n<mode>"
3494 [(match_operand:<V_widen> 0 "s_register_operand" "")
3495 (match_operand:<V_widen> 1 "s_register_operand" "")
3496 (match_operand:VMDI 2 "s_register_operand" "")
3497 (match_operand:<V_elem> 3 "s_register_operand" "")]
3500 rtx tmp = gen_reg_rtx (<MODE>mode);
3501 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3502 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3507 (define_expand "neon_vqdmlal_n<mode>"
3508 [(match_operand:<V_widen> 0 "s_register_operand" "")
3509 (match_operand:<V_widen> 1 "s_register_operand" "")
3510 (match_operand:VMDI 2 "s_register_operand" "")
3511 (match_operand:<V_elem> 3 "s_register_operand" "")]
3514 rtx tmp = gen_reg_rtx (<MODE>mode);
3515 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3516 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3521 (define_expand "neon_vmls_n<mode>"
3522 [(match_operand:VMD 0 "s_register_operand" "")
3523 (match_operand:VMD 1 "s_register_operand" "")
3524 (match_operand:VMD 2 "s_register_operand" "")
3525 (match_operand:<V_elem> 3 "s_register_operand" "")]
3528 rtx tmp = gen_reg_rtx (<MODE>mode);
3529 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3530 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3535 (define_expand "neon_vmls_n<mode>"
3536 [(match_operand:VMQ 0 "s_register_operand" "")
3537 (match_operand:VMQ 1 "s_register_operand" "")
3538 (match_operand:VMQ 2 "s_register_operand" "")
3539 (match_operand:<V_elem> 3 "s_register_operand" "")]
3542 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3543 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3544 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3549 (define_expand "neon_vmlsls_n<mode>"
3550 [(match_operand:<V_widen> 0 "s_register_operand" "")
3551 (match_operand:<V_widen> 1 "s_register_operand" "")
3552 (match_operand:VMDI 2 "s_register_operand" "")
3553 (match_operand:<V_elem> 3 "s_register_operand" "")]
3556 rtx tmp = gen_reg_rtx (<MODE>mode);
3557 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3558 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3563 (define_expand "neon_vmlslu_n<mode>"
3564 [(match_operand:<V_widen> 0 "s_register_operand" "")
3565 (match_operand:<V_widen> 1 "s_register_operand" "")
3566 (match_operand:VMDI 2 "s_register_operand" "")
3567 (match_operand:<V_elem> 3 "s_register_operand" "")]
3570 rtx tmp = gen_reg_rtx (<MODE>mode);
3571 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3572 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3577 (define_expand "neon_vqdmlsl_n<mode>"
3578 [(match_operand:<V_widen> 0 "s_register_operand" "")
3579 (match_operand:<V_widen> 1 "s_register_operand" "")
3580 (match_operand:VMDI 2 "s_register_operand" "")
3581 (match_operand:<V_elem> 3 "s_register_operand" "")]
3584 rtx tmp = gen_reg_rtx (<MODE>mode);
3585 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3586 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3591 (define_insn "neon_vext<mode>"
3592 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3593 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3594 (match_operand:VDQX 2 "s_register_operand" "w")
3595 (match_operand:SI 3 "immediate_operand" "i")]
3599 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3600 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3602 [(set_attr "type" "neon_ext<q>")]
3605 (define_insn "neon_vrev64<mode>"
3606 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3607 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3610 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3611 [(set_attr "type" "neon_rev<q>")]
3614 (define_insn "neon_vrev32<mode>"
3615 [(set (match_operand:VX 0 "s_register_operand" "=w")
3616 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3619 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3620 [(set_attr "type" "neon_rev<q>")]
3623 (define_insn "neon_vrev16<mode>"
3624 [(set (match_operand:VE 0 "s_register_operand" "=w")
3625 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3628 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3629 [(set_attr "type" "neon_rev<q>")]
3632 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3633 ; allocation. For an intrinsic of form:
3634 ; rD = vbsl_* (rS, rN, rM)
3635 ; We can use any of:
3636 ; vbsl rS, rN, rM (if D = S)
3637 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3638 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3640 (define_insn "neon_vbsl<mode>_internal"
3641 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3642 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3643 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3644 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3648 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3649 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3650 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3651 [(set_attr "type" "neon_bsl<q>")]
3654 (define_expand "neon_vbsl<mode>"
3655 [(set (match_operand:VDQX 0 "s_register_operand" "")
3656 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3657 (match_operand:VDQX 2 "s_register_operand" "")
3658 (match_operand:VDQX 3 "s_register_operand" "")]
3662 /* We can't alias operands together if they have different modes. */
3663 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3667 (define_insn "neon_v<shift_op><sup><mode>"
3668 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3669 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3670 (match_operand:VDQIX 2 "s_register_operand" "w")]
3673 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3674 [(set_attr "type" "neon_shift_imm<q>")]
3678 (define_insn "neon_v<shift_op><sup><mode>"
3679 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3680 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3681 (match_operand:VDQIX 2 "s_register_operand" "w")]
3684 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3685 [(set_attr "type" "neon_sat_shift_imm<q>")]
3689 (define_insn "neon_v<shift_op><sup>_n<mode>"
3690 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3691 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3692 (match_operand:SI 2 "immediate_operand" "i")]
3696 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3697 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3699 [(set_attr "type" "neon_shift_imm<q>")]
3702 ;; vshrn_n, vrshrn_n
3703 (define_insn "neon_v<shift_op>_n<mode>"
3704 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3705 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3706 (match_operand:SI 2 "immediate_operand" "i")]
3710 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3711 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3713 [(set_attr "type" "neon_shift_imm_narrow_q")]
3716 ;; vqshrn_n, vqrshrn_n
3717 (define_insn "neon_v<shift_op><sup>_n<mode>"
3718 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3719 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3720 (match_operand:SI 2 "immediate_operand" "i")]
3724 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3725 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3727 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3730 ;; vqshrun_n, vqrshrun_n
3731 (define_insn "neon_v<shift_op>_n<mode>"
3732 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3733 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3734 (match_operand:SI 2 "immediate_operand" "i")]
3738 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3739 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3741 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3744 (define_insn "neon_vshl_n<mode>"
3745 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3746 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3747 (match_operand:SI 2 "immediate_operand" "i")]
3751 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3752 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3754 [(set_attr "type" "neon_shift_imm<q>")]
3757 (define_insn "neon_vqshl_<sup>_n<mode>"
3758 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3759 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3760 (match_operand:SI 2 "immediate_operand" "i")]
3764 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3765 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3767 [(set_attr "type" "neon_sat_shift_imm<q>")]
3770 (define_insn "neon_vqshlu_n<mode>"
3771 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3772 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3773 (match_operand:SI 2 "immediate_operand" "i")]
3777 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3778 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3780 [(set_attr "type" "neon_sat_shift_imm<q>")]
3783 (define_insn "neon_vshll<sup>_n<mode>"
3784 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3785 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3786 (match_operand:SI 2 "immediate_operand" "i")]
3790 /* The boundaries are: 0 < imm <= size. */
3791 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3792 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3794 [(set_attr "type" "neon_shift_imm_long")]
3798 (define_insn "neon_v<shift_op><sup>_n<mode>"
3799 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3800 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3801 (match_operand:VDQIX 2 "s_register_operand" "w")
3802 (match_operand:SI 3 "immediate_operand" "i")]
3806 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3807 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3809 [(set_attr "type" "neon_shift_acc<q>")]
3812 (define_insn "neon_vsri_n<mode>"
3813 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3814 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3815 (match_operand:VDQIX 2 "s_register_operand" "w")
3816 (match_operand:SI 3 "immediate_operand" "i")]
3820 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3821 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3823 [(set_attr "type" "neon_shift_reg<q>")]
3826 (define_insn "neon_vsli_n<mode>"
3827 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3828 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3829 (match_operand:VDQIX 2 "s_register_operand" "w")
3830 (match_operand:SI 3 "immediate_operand" "i")]
3834 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3835 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3837 [(set_attr "type" "neon_shift_reg<q>")]
3840 (define_insn "neon_vtbl1v8qi"
3841 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3842 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3843 (match_operand:V8QI 2 "s_register_operand" "w")]
3846 "vtbl.8\t%P0, {%P1}, %P2"
3847 [(set_attr "type" "neon_tbl1")]
3850 (define_insn "neon_vtbl2v8qi"
3851 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3852 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3853 (match_operand:V8QI 2 "s_register_operand" "w")]
3858 int tabbase = REGNO (operands[1]);
3860 ops[0] = operands[0];
3861 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3862 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3863 ops[3] = operands[2];
3864 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3868 [(set_attr "type" "neon_tbl2")]
3871 (define_insn "neon_vtbl3v8qi"
3872 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3873 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3874 (match_operand:V8QI 2 "s_register_operand" "w")]
3879 int tabbase = REGNO (operands[1]);
3881 ops[0] = operands[0];
3882 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3883 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3884 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3885 ops[4] = operands[2];
3886 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3890 [(set_attr "type" "neon_tbl3")]
3893 (define_insn "neon_vtbl4v8qi"
3894 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3895 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3896 (match_operand:V8QI 2 "s_register_operand" "w")]
3901 int tabbase = REGNO (operands[1]);
3903 ops[0] = operands[0];
3904 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3905 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3906 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3907 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3908 ops[5] = operands[2];
3909 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3913 [(set_attr "type" "neon_tbl4")]
3916 ;; These three are used by the vec_perm infrastructure for V16QImode.
3917 (define_insn_and_split "neon_vtbl1v16qi"
3918 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3919 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3920 (match_operand:V16QI 2 "s_register_operand" "w")]
3924 "&& reload_completed"
3927 rtx op0, op1, op2, part0, part2;
3931 op1 = gen_lowpart (TImode, operands[1]);
3934 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3935 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3936 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3937 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3939 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3940 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3941 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3942 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3945 [(set_attr "type" "multiple")]
3948 (define_insn_and_split "neon_vtbl2v16qi"
3949 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3950 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3951 (match_operand:V16QI 2 "s_register_operand" "w")]
3955 "&& reload_completed"
3958 rtx op0, op1, op2, part0, part2;
3965 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3966 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3967 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3968 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3970 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3971 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3972 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3973 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3976 [(set_attr "type" "multiple")]
3979 ;; ??? Logically we should extend the regular neon_vcombine pattern to
3980 ;; handle quad-word input modes, producing octa-word output modes. But
3981 ;; that requires us to add support for octa-word vector modes in moves.
3982 ;; That seems overkill for this one use in vec_perm.
3983 (define_insn_and_split "neon_vcombinev16qi"
3984 [(set (match_operand:OI 0 "s_register_operand" "=w")
3985 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
3986 (match_operand:V16QI 2 "s_register_operand" "w")]
3990 "&& reload_completed"
3993 neon_split_vcombine (operands);
3996 [(set_attr "type" "multiple")]
3999 (define_insn "neon_vtbx1v8qi"
4000 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4001 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4002 (match_operand:V8QI 2 "s_register_operand" "w")
4003 (match_operand:V8QI 3 "s_register_operand" "w")]
4006 "vtbx.8\t%P0, {%P2}, %P3"
4007 [(set_attr "type" "neon_tbl1")]
4010 (define_insn "neon_vtbx2v8qi"
4011 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4012 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4013 (match_operand:TI 2 "s_register_operand" "w")
4014 (match_operand:V8QI 3 "s_register_operand" "w")]
4019 int tabbase = REGNO (operands[2]);
4021 ops[0] = operands[0];
4022 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4023 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4024 ops[3] = operands[3];
4025 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4029 [(set_attr "type" "neon_tbl2")]
4032 (define_insn "neon_vtbx3v8qi"
4033 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4034 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4035 (match_operand:EI 2 "s_register_operand" "w")
4036 (match_operand:V8QI 3 "s_register_operand" "w")]
4041 int tabbase = REGNO (operands[2]);
4043 ops[0] = operands[0];
4044 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4045 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4046 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4047 ops[4] = operands[3];
4048 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4052 [(set_attr "type" "neon_tbl3")]
4055 (define_insn "neon_vtbx4v8qi"
4056 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4057 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4058 (match_operand:OI 2 "s_register_operand" "w")
4059 (match_operand:V8QI 3 "s_register_operand" "w")]
4064 int tabbase = REGNO (operands[2]);
4066 ops[0] = operands[0];
4067 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4068 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4069 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4070 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4071 ops[5] = operands[3];
4072 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4076 [(set_attr "type" "neon_tbl4")]
4079 (define_expand "neon_vtrn<mode>_internal"
4081 [(set (match_operand:VDQW 0 "s_register_operand" "")
4082 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4083 (match_operand:VDQW 2 "s_register_operand" "")]
4085 (set (match_operand:VDQW 3 "s_register_operand" "")
4086 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4091 ;; Note: Different operand numbering to handle tied registers correctly.
4092 (define_insn "*neon_vtrn<mode>_insn"
4093 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4094 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4095 (match_operand:VDQW 3 "s_register_operand" "2")]
4097 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4098 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4101 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4102 [(set_attr "type" "neon_permute<q>")]
4105 (define_expand "neon_vzip<mode>_internal"
4107 [(set (match_operand:VDQW 0 "s_register_operand" "")
4108 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4109 (match_operand:VDQW 2 "s_register_operand" "")]
4111 (set (match_operand:VDQW 3 "s_register_operand" "")
4112 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4117 ;; Note: Different operand numbering to handle tied registers correctly.
4118 (define_insn "*neon_vzip<mode>_insn"
4119 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4120 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4121 (match_operand:VDQW 3 "s_register_operand" "2")]
4123 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4124 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4127 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4128 [(set_attr "type" "neon_zip<q>")]
4131 (define_expand "neon_vuzp<mode>_internal"
4133 [(set (match_operand:VDQW 0 "s_register_operand" "")
4134 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4135 (match_operand:VDQW 2 "s_register_operand" "")]
4137 (set (match_operand:VDQW 3 "s_register_operand" "")
4138 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4143 ;; Note: Different operand numbering to handle tied registers correctly.
4144 (define_insn "*neon_vuzp<mode>_insn"
4145 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4146 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4147 (match_operand:VDQW 3 "s_register_operand" "2")]
4149 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4150 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4153 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4154 [(set_attr "type" "neon_zip<q>")]
4157 (define_expand "neon_vreinterpretv8qi<mode>"
4158 [(match_operand:V8QI 0 "s_register_operand" "")
4159 (match_operand:VDX 1 "s_register_operand" "")]
4162 neon_reinterpret (operands[0], operands[1]);
4166 (define_expand "neon_vreinterpretv4hi<mode>"
4167 [(match_operand:V4HI 0 "s_register_operand" "")
4168 (match_operand:VDX 1 "s_register_operand" "")]
4171 neon_reinterpret (operands[0], operands[1]);
4175 (define_expand "neon_vreinterpretv2si<mode>"
4176 [(match_operand:V2SI 0 "s_register_operand" "")
4177 (match_operand:VDX 1 "s_register_operand" "")]
4180 neon_reinterpret (operands[0], operands[1]);
4184 (define_expand "neon_vreinterpretv2sf<mode>"
4185 [(match_operand:V2SF 0 "s_register_operand" "")
4186 (match_operand:VDX 1 "s_register_operand" "")]
4189 neon_reinterpret (operands[0], operands[1]);
4193 (define_expand "neon_vreinterpretdi<mode>"
4194 [(match_operand:DI 0 "s_register_operand" "")
4195 (match_operand:VDX 1 "s_register_operand" "")]
4198 neon_reinterpret (operands[0], operands[1]);
4202 (define_expand "neon_vreinterpretti<mode>"
4203 [(match_operand:TI 0 "s_register_operand" "")
4204 (match_operand:VQXMOV 1 "s_register_operand" "")]
4207 neon_reinterpret (operands[0], operands[1]);
4212 (define_expand "neon_vreinterpretv16qi<mode>"
4213 [(match_operand:V16QI 0 "s_register_operand" "")
4214 (match_operand:VQXMOV 1 "s_register_operand" "")]
4217 neon_reinterpret (operands[0], operands[1]);
4221 (define_expand "neon_vreinterpretv8hi<mode>"
4222 [(match_operand:V8HI 0 "s_register_operand" "")
4223 (match_operand:VQXMOV 1 "s_register_operand" "")]
4226 neon_reinterpret (operands[0], operands[1]);
4230 (define_expand "neon_vreinterpretv4si<mode>"
4231 [(match_operand:V4SI 0 "s_register_operand" "")
4232 (match_operand:VQXMOV 1 "s_register_operand" "")]
4235 neon_reinterpret (operands[0], operands[1]);
4239 (define_expand "neon_vreinterpretv4sf<mode>"
4240 [(match_operand:V4SF 0 "s_register_operand" "")
4241 (match_operand:VQXMOV 1 "s_register_operand" "")]
4244 neon_reinterpret (operands[0], operands[1]);
4248 (define_expand "neon_vreinterpretv2di<mode>"
4249 [(match_operand:V2DI 0 "s_register_operand" "")
4250 (match_operand:VQXMOV 1 "s_register_operand" "")]
4253 neon_reinterpret (operands[0], operands[1]);
4257 (define_expand "vec_load_lanes<mode><mode>"
4258 [(set (match_operand:VDQX 0 "s_register_operand")
4259 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4263 (define_insn "neon_vld1<mode>"
4264 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4265 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4268 "vld1.<V_sz_elem>\t%h0, %A1"
4269 [(set_attr "type" "neon_load1_1reg<q>")]
4272 (define_insn "neon_vld1_lane<mode>"
4273 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4274 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4275 (match_operand:VDX 2 "s_register_operand" "0")
4276 (match_operand:SI 3 "immediate_operand" "i")]
4280 HOST_WIDE_INT lane = INTVAL (operands[3]);
4281 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4282 if (lane < 0 || lane >= max)
4283 error ("lane out of range");
4285 return "vld1.<V_sz_elem>\t%P0, %A1";
4287 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4289 [(set_attr "type" "neon_load1_one_lane<q>")]
4292 (define_insn "neon_vld1_lane<mode>"
4293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4294 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4295 (match_operand:VQX 2 "s_register_operand" "0")
4296 (match_operand:SI 3 "immediate_operand" "i")]
4300 HOST_WIDE_INT lane = INTVAL (operands[3]);
4301 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4302 int regno = REGNO (operands[0]);
4303 if (lane < 0 || lane >= max)
4304 error ("lane out of range");
4305 else if (lane >= max / 2)
4309 operands[3] = GEN_INT (lane);
4311 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4313 return "vld1.<V_sz_elem>\t%P0, %A1";
4315 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4317 [(set_attr "type" "neon_load1_one_lane<q>")]
4320 (define_insn "neon_vld1_dup<mode>"
4321 [(set (match_operand:VD 0 "s_register_operand" "=w")
4322 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4324 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4325 [(set_attr "type" "neon_load1_all_lanes<q>")]
4328 ;; Special case for DImode. Treat it exactly like a simple load.
4329 (define_expand "neon_vld1_dupdi"
4330 [(set (match_operand:DI 0 "s_register_operand" "")
4331 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4337 (define_insn "neon_vld1_dup<mode>"
4338 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4339 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4342 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4344 [(set_attr "type" "neon_load1_all_lanes<q>")]
4347 (define_insn_and_split "neon_vld1_dupv2di"
4348 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4349 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4352 "&& reload_completed"
4355 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4356 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4357 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4360 [(set_attr "length" "8")
4361 (set_attr "type" "neon_load1_all_lanes_q")]
4364 (define_expand "vec_store_lanes<mode><mode>"
4365 [(set (match_operand:VDQX 0 "neon_struct_operand")
4366 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4370 (define_insn "neon_vst1<mode>"
4371 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4372 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4375 "vst1.<V_sz_elem>\t%h1, %A0"
4376 [(set_attr "type" "neon_store1_1reg<q>")])
4378 (define_insn "neon_vst1_lane<mode>"
4379 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4381 [(match_operand:VDX 1 "s_register_operand" "w")
4382 (match_operand:SI 2 "immediate_operand" "i")]
4386 HOST_WIDE_INT lane = INTVAL (operands[2]);
4387 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4388 if (lane < 0 || lane >= max)
4389 error ("lane out of range");
4391 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4393 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4395 [(set_attr "type" "neon_store1_one_lane<q>")]
4398 (define_insn "neon_vst1_lane<mode>"
4399 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4401 [(match_operand:VQX 1 "s_register_operand" "w")
4402 (match_operand:SI 2 "immediate_operand" "i")]
4406 HOST_WIDE_INT lane = INTVAL (operands[2]);
4407 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4408 int regno = REGNO (operands[1]);
4409 if (lane < 0 || lane >= max)
4410 error ("lane out of range");
4411 else if (lane >= max / 2)
4415 operands[2] = GEN_INT (lane);
4417 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4419 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4421 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4423 [(set_attr "type" "neon_store1_one_lane<q>")]
4426 (define_expand "vec_load_lanesti<mode>"
4427 [(set (match_operand:TI 0 "s_register_operand")
4428 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4429 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4433 (define_insn "neon_vld2<mode>"
4434 [(set (match_operand:TI 0 "s_register_operand" "=w")
4435 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4436 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4440 if (<V_sz_elem> == 64)
4441 return "vld1.64\t%h0, %A1";
4443 return "vld2.<V_sz_elem>\t%h0, %A1";
4446 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4447 (const_string "neon_load1_2reg<q>")
4448 (const_string "neon_load2_2reg<q>")))]
4451 (define_expand "vec_load_lanesoi<mode>"
4452 [(set (match_operand:OI 0 "s_register_operand")
4453 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4454 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4458 (define_insn "neon_vld2<mode>"
4459 [(set (match_operand:OI 0 "s_register_operand" "=w")
4460 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4461 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4464 "vld2.<V_sz_elem>\t%h0, %A1"
4465 [(set_attr "type" "neon_load2_2reg_q")])
4467 (define_insn "neon_vld2_lane<mode>"
4468 [(set (match_operand:TI 0 "s_register_operand" "=w")
4469 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4470 (match_operand:TI 2 "s_register_operand" "0")
4471 (match_operand:SI 3 "immediate_operand" "i")
4472 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4476 HOST_WIDE_INT lane = INTVAL (operands[3]);
4477 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4478 int regno = REGNO (operands[0]);
4480 if (lane < 0 || lane >= max)
4481 error ("lane out of range");
4482 ops[0] = gen_rtx_REG (DImode, regno);
4483 ops[1] = gen_rtx_REG (DImode, regno + 2);
4484 ops[2] = operands[1];
4485 ops[3] = operands[3];
4486 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4489 [(set_attr "type" "neon_load2_one_lane<q>")]
4492 (define_insn "neon_vld2_lane<mode>"
4493 [(set (match_operand:OI 0 "s_register_operand" "=w")
4494 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4495 (match_operand:OI 2 "s_register_operand" "0")
4496 (match_operand:SI 3 "immediate_operand" "i")
4497 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4501 HOST_WIDE_INT lane = INTVAL (operands[3]);
4502 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4503 int regno = REGNO (operands[0]);
4505 if (lane < 0 || lane >= max)
4506 error ("lane out of range");
4507 else if (lane >= max / 2)
4512 ops[0] = gen_rtx_REG (DImode, regno);
4513 ops[1] = gen_rtx_REG (DImode, regno + 4);
4514 ops[2] = operands[1];
4515 ops[3] = GEN_INT (lane);
4516 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4519 [(set_attr "type" "neon_load2_one_lane<q>")]
4522 (define_insn "neon_vld2_dup<mode>"
4523 [(set (match_operand:TI 0 "s_register_operand" "=w")
4524 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4525 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4529 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4530 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4532 return "vld1.<V_sz_elem>\t%h0, %A1";
4535 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4536 (const_string "neon_load2_all_lanes<q>")
4537 (const_string "neon_load1_1reg<q>")))]
4540 (define_expand "vec_store_lanesti<mode>"
4541 [(set (match_operand:TI 0 "neon_struct_operand")
4542 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4547 (define_insn "neon_vst2<mode>"
4548 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4549 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4550 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4554 if (<V_sz_elem> == 64)
4555 return "vst1.64\t%h1, %A0";
4557 return "vst2.<V_sz_elem>\t%h1, %A0";
4560 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4561 (const_string "neon_store1_2reg<q>")
4562 (const_string "neon_store2_one_lane<q>")))]
4565 (define_expand "vec_store_lanesoi<mode>"
4566 [(set (match_operand:OI 0 "neon_struct_operand")
4567 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 (define_insn "neon_vst2<mode>"
4573 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4574 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4575 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4578 "vst2.<V_sz_elem>\t%h1, %A0"
4579 [(set_attr "type" "neon_store2_4reg<q>")]
4582 (define_insn "neon_vst2_lane<mode>"
4583 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4584 (unspec:<V_two_elem>
4585 [(match_operand:TI 1 "s_register_operand" "w")
4586 (match_operand:SI 2 "immediate_operand" "i")
4587 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4591 HOST_WIDE_INT lane = INTVAL (operands[2]);
4592 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4593 int regno = REGNO (operands[1]);
4595 if (lane < 0 || lane >= max)
4596 error ("lane out of range");
4597 ops[0] = operands[0];
4598 ops[1] = gen_rtx_REG (DImode, regno);
4599 ops[2] = gen_rtx_REG (DImode, regno + 2);
4600 ops[3] = operands[2];
4601 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4604 [(set_attr "type" "neon_store2_one_lane<q>")]
4607 (define_insn "neon_vst2_lane<mode>"
4608 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4609 (unspec:<V_two_elem>
4610 [(match_operand:OI 1 "s_register_operand" "w")
4611 (match_operand:SI 2 "immediate_operand" "i")
4612 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4616 HOST_WIDE_INT lane = INTVAL (operands[2]);
4617 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4618 int regno = REGNO (operands[1]);
4620 if (lane < 0 || lane >= max)
4621 error ("lane out of range");
4622 else if (lane >= max / 2)
4627 ops[0] = operands[0];
4628 ops[1] = gen_rtx_REG (DImode, regno);
4629 ops[2] = gen_rtx_REG (DImode, regno + 4);
4630 ops[3] = GEN_INT (lane);
4631 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4634 [(set_attr "type" "neon_store2_one_lane<q>")]
4637 (define_expand "vec_load_lanesei<mode>"
4638 [(set (match_operand:EI 0 "s_register_operand")
4639 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4640 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4644 (define_insn "neon_vld3<mode>"
4645 [(set (match_operand:EI 0 "s_register_operand" "=w")
4646 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4647 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4651 if (<V_sz_elem> == 64)
4652 return "vld1.64\t%h0, %A1";
4654 return "vld3.<V_sz_elem>\t%h0, %A1";
4657 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4658 (const_string "neon_load1_3reg<q>")
4659 (const_string "neon_load3_3reg<q>")))]
4662 (define_expand "vec_load_lanesci<mode>"
4663 [(match_operand:CI 0 "s_register_operand")
4664 (match_operand:CI 1 "neon_struct_operand")
4665 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4668 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4672 (define_expand "neon_vld3<mode>"
4673 [(match_operand:CI 0 "s_register_operand")
4674 (match_operand:CI 1 "neon_struct_operand")
4675 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4680 mem = adjust_address (operands[1], EImode, 0);
4681 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4682 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4683 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4687 (define_insn "neon_vld3qa<mode>"
4688 [(set (match_operand:CI 0 "s_register_operand" "=w")
4689 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4690 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4694 int regno = REGNO (operands[0]);
4696 ops[0] = gen_rtx_REG (DImode, regno);
4697 ops[1] = gen_rtx_REG (DImode, regno + 4);
4698 ops[2] = gen_rtx_REG (DImode, regno + 8);
4699 ops[3] = operands[1];
4700 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4703 [(set_attr "type" "neon_load3_3reg<q>")]
4706 (define_insn "neon_vld3qb<mode>"
4707 [(set (match_operand:CI 0 "s_register_operand" "=w")
4708 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4709 (match_operand:CI 2 "s_register_operand" "0")
4710 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4714 int regno = REGNO (operands[0]);
4716 ops[0] = gen_rtx_REG (DImode, regno + 2);
4717 ops[1] = gen_rtx_REG (DImode, regno + 6);
4718 ops[2] = gen_rtx_REG (DImode, regno + 10);
4719 ops[3] = operands[1];
4720 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4723 [(set_attr "type" "neon_load3_3reg<q>")]
4726 (define_insn "neon_vld3_lane<mode>"
4727 [(set (match_operand:EI 0 "s_register_operand" "=w")
4728 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4729 (match_operand:EI 2 "s_register_operand" "0")
4730 (match_operand:SI 3 "immediate_operand" "i")
4731 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4735 HOST_WIDE_INT lane = INTVAL (operands[3]);
4736 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4737 int regno = REGNO (operands[0]);
4739 if (lane < 0 || lane >= max)
4740 error ("lane out of range");
4741 ops[0] = gen_rtx_REG (DImode, regno);
4742 ops[1] = gen_rtx_REG (DImode, regno + 2);
4743 ops[2] = gen_rtx_REG (DImode, regno + 4);
4744 ops[3] = operands[1];
4745 ops[4] = operands[3];
4746 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4750 [(set_attr "type" "neon_load3_one_lane<q>")]
4753 (define_insn "neon_vld3_lane<mode>"
4754 [(set (match_operand:CI 0 "s_register_operand" "=w")
4755 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4756 (match_operand:CI 2 "s_register_operand" "0")
4757 (match_operand:SI 3 "immediate_operand" "i")
4758 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4762 HOST_WIDE_INT lane = INTVAL (operands[3]);
4763 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4764 int regno = REGNO (operands[0]);
4766 if (lane < 0 || lane >= max)
4767 error ("lane out of range");
4768 else if (lane >= max / 2)
4773 ops[0] = gen_rtx_REG (DImode, regno);
4774 ops[1] = gen_rtx_REG (DImode, regno + 4);
4775 ops[2] = gen_rtx_REG (DImode, regno + 8);
4776 ops[3] = operands[1];
4777 ops[4] = GEN_INT (lane);
4778 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4782 [(set_attr "type" "neon_load3_one_lane<q>")]
4785 (define_insn "neon_vld3_dup<mode>"
4786 [(set (match_operand:EI 0 "s_register_operand" "=w")
4787 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4788 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4792 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4794 int regno = REGNO (operands[0]);
4796 ops[0] = gen_rtx_REG (DImode, regno);
4797 ops[1] = gen_rtx_REG (DImode, regno + 2);
4798 ops[2] = gen_rtx_REG (DImode, regno + 4);
4799 ops[3] = operands[1];
4800 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4804 return "vld1.<V_sz_elem>\t%h0, %A1";
4807 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4808 (const_string "neon_load3_all_lanes<q>")
4809 (const_string "neon_load1_1reg<q>")))])
4811 (define_expand "vec_store_lanesei<mode>"
4812 [(set (match_operand:EI 0 "neon_struct_operand")
4813 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4814 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4818 (define_insn "neon_vst3<mode>"
4819 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4820 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4821 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4825 if (<V_sz_elem> == 64)
4826 return "vst1.64\t%h1, %A0";
4828 return "vst3.<V_sz_elem>\t%h1, %A0";
4831 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4832 (const_string "neon_store1_3reg<q>")
4833 (const_string "neon_store3_one_lane<q>")))])
4835 (define_expand "vec_store_lanesci<mode>"
4836 [(match_operand:CI 0 "neon_struct_operand")
4837 (match_operand:CI 1 "s_register_operand")
4838 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4845 (define_expand "neon_vst3<mode>"
4846 [(match_operand:CI 0 "neon_struct_operand")
4847 (match_operand:CI 1 "s_register_operand")
4848 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4853 mem = adjust_address (operands[0], EImode, 0);
4854 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4855 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4856 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4860 (define_insn "neon_vst3qa<mode>"
4861 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4862 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4863 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4867 int regno = REGNO (operands[1]);
4869 ops[0] = operands[0];
4870 ops[1] = gen_rtx_REG (DImode, regno);
4871 ops[2] = gen_rtx_REG (DImode, regno + 4);
4872 ops[3] = gen_rtx_REG (DImode, regno + 8);
4873 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4876 [(set_attr "type" "neon_store3_3reg<q>")]
4879 (define_insn "neon_vst3qb<mode>"
4880 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4881 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4882 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4886 int regno = REGNO (operands[1]);
4888 ops[0] = operands[0];
4889 ops[1] = gen_rtx_REG (DImode, regno + 2);
4890 ops[2] = gen_rtx_REG (DImode, regno + 6);
4891 ops[3] = gen_rtx_REG (DImode, regno + 10);
4892 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4895 [(set_attr "type" "neon_store3_3reg<q>")]
4898 (define_insn "neon_vst3_lane<mode>"
4899 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4900 (unspec:<V_three_elem>
4901 [(match_operand:EI 1 "s_register_operand" "w")
4902 (match_operand:SI 2 "immediate_operand" "i")
4903 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907 HOST_WIDE_INT lane = INTVAL (operands[2]);
4908 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4909 int regno = REGNO (operands[1]);
4911 if (lane < 0 || lane >= max)
4912 error ("lane out of range");
4913 ops[0] = operands[0];
4914 ops[1] = gen_rtx_REG (DImode, regno);
4915 ops[2] = gen_rtx_REG (DImode, regno + 2);
4916 ops[3] = gen_rtx_REG (DImode, regno + 4);
4917 ops[4] = operands[2];
4918 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4922 [(set_attr "type" "neon_store3_one_lane<q>")]
4925 (define_insn "neon_vst3_lane<mode>"
4926 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4927 (unspec:<V_three_elem>
4928 [(match_operand:CI 1 "s_register_operand" "w")
4929 (match_operand:SI 2 "immediate_operand" "i")
4930 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4934 HOST_WIDE_INT lane = INTVAL (operands[2]);
4935 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4936 int regno = REGNO (operands[1]);
4938 if (lane < 0 || lane >= max)
4939 error ("lane out of range");
4940 else if (lane >= max / 2)
4945 ops[0] = operands[0];
4946 ops[1] = gen_rtx_REG (DImode, regno);
4947 ops[2] = gen_rtx_REG (DImode, regno + 4);
4948 ops[3] = gen_rtx_REG (DImode, regno + 8);
4949 ops[4] = GEN_INT (lane);
4950 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4954 [(set_attr "type" "neon_store3_one_lane<q>")]
4957 (define_expand "vec_load_lanesoi<mode>"
4958 [(set (match_operand:OI 0 "s_register_operand")
4959 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4960 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964 (define_insn "neon_vld4<mode>"
4965 [(set (match_operand:OI 0 "s_register_operand" "=w")
4966 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4967 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971 if (<V_sz_elem> == 64)
4972 return "vld1.64\t%h0, %A1";
4974 return "vld4.<V_sz_elem>\t%h0, %A1";
4977 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4978 (const_string "neon_load1_4reg<q>")
4979 (const_string "neon_load4_4reg<q>")))]
4982 (define_expand "vec_load_lanesxi<mode>"
4983 [(match_operand:XI 0 "s_register_operand")
4984 (match_operand:XI 1 "neon_struct_operand")
4985 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4988 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
4992 (define_expand "neon_vld4<mode>"
4993 [(match_operand:XI 0 "s_register_operand")
4994 (match_operand:XI 1 "neon_struct_operand")
4995 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000 mem = adjust_address (operands[1], OImode, 0);
5001 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5002 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5003 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5007 (define_insn "neon_vld4qa<mode>"
5008 [(set (match_operand:XI 0 "s_register_operand" "=w")
5009 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5010 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5014 int regno = REGNO (operands[0]);
5016 ops[0] = gen_rtx_REG (DImode, regno);
5017 ops[1] = gen_rtx_REG (DImode, regno + 4);
5018 ops[2] = gen_rtx_REG (DImode, regno + 8);
5019 ops[3] = gen_rtx_REG (DImode, regno + 12);
5020 ops[4] = operands[1];
5021 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5024 [(set_attr "type" "neon_load4_4reg<q>")]
5027 (define_insn "neon_vld4qb<mode>"
5028 [(set (match_operand:XI 0 "s_register_operand" "=w")
5029 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5030 (match_operand:XI 2 "s_register_operand" "0")
5031 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5035 int regno = REGNO (operands[0]);
5037 ops[0] = gen_rtx_REG (DImode, regno + 2);
5038 ops[1] = gen_rtx_REG (DImode, regno + 6);
5039 ops[2] = gen_rtx_REG (DImode, regno + 10);
5040 ops[3] = gen_rtx_REG (DImode, regno + 14);
5041 ops[4] = operands[1];
5042 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5045 [(set_attr "type" "neon_load4_4reg<q>")]
5048 (define_insn "neon_vld4_lane<mode>"
5049 [(set (match_operand:OI 0 "s_register_operand" "=w")
5050 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5051 (match_operand:OI 2 "s_register_operand" "0")
5052 (match_operand:SI 3 "immediate_operand" "i")
5053 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5057 HOST_WIDE_INT lane = INTVAL (operands[3]);
5058 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5059 int regno = REGNO (operands[0]);
5061 if (lane < 0 || lane >= max)
5062 error ("lane out of range");
5063 ops[0] = gen_rtx_REG (DImode, regno);
5064 ops[1] = gen_rtx_REG (DImode, regno + 2);
5065 ops[2] = gen_rtx_REG (DImode, regno + 4);
5066 ops[3] = gen_rtx_REG (DImode, regno + 6);
5067 ops[4] = operands[1];
5068 ops[5] = operands[3];
5069 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5073 [(set_attr "type" "neon_load4_one_lane<q>")]
5076 (define_insn "neon_vld4_lane<mode>"
5077 [(set (match_operand:XI 0 "s_register_operand" "=w")
5078 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5079 (match_operand:XI 2 "s_register_operand" "0")
5080 (match_operand:SI 3 "immediate_operand" "i")
5081 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 HOST_WIDE_INT lane = INTVAL (operands[3]);
5086 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5087 int regno = REGNO (operands[0]);
5089 if (lane < 0 || lane >= max)
5090 error ("lane out of range");
5091 else if (lane >= max / 2)
5096 ops[0] = gen_rtx_REG (DImode, regno);
5097 ops[1] = gen_rtx_REG (DImode, regno + 4);
5098 ops[2] = gen_rtx_REG (DImode, regno + 8);
5099 ops[3] = gen_rtx_REG (DImode, regno + 12);
5100 ops[4] = operands[1];
5101 ops[5] = GEN_INT (lane);
5102 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5106 [(set_attr "type" "neon_load4_one_lane<q>")]
5109 (define_insn "neon_vld4_dup<mode>"
5110 [(set (match_operand:OI 0 "s_register_operand" "=w")
5111 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5112 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5116 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5118 int regno = REGNO (operands[0]);
5120 ops[0] = gen_rtx_REG (DImode, regno);
5121 ops[1] = gen_rtx_REG (DImode, regno + 2);
5122 ops[2] = gen_rtx_REG (DImode, regno + 4);
5123 ops[3] = gen_rtx_REG (DImode, regno + 6);
5124 ops[4] = operands[1];
5125 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5130 return "vld1.<V_sz_elem>\t%h0, %A1";
5133 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5134 (const_string "neon_load4_all_lanes<q>")
5135 (const_string "neon_load1_1reg<q>")))]
5138 (define_expand "vec_store_lanesoi<mode>"
5139 [(set (match_operand:OI 0 "neon_struct_operand")
5140 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5141 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5145 (define_insn "neon_vst4<mode>"
5146 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5147 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5148 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5152 if (<V_sz_elem> == 64)
5153 return "vst1.64\t%h1, %A0";
5155 return "vst4.<V_sz_elem>\t%h1, %A0";
5158 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5159 (const_string "neon_store1_4reg<q>")
5160 (const_string "neon_store4_4reg<q>")))]
5163 (define_expand "vec_store_lanesxi<mode>"
5164 [(match_operand:XI 0 "neon_struct_operand")
5165 (match_operand:XI 1 "s_register_operand")
5166 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5169 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5173 (define_expand "neon_vst4<mode>"
5174 [(match_operand:XI 0 "neon_struct_operand")
5175 (match_operand:XI 1 "s_register_operand")
5176 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5181 mem = adjust_address (operands[0], OImode, 0);
5182 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5183 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5184 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5188 (define_insn "neon_vst4qa<mode>"
5189 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5190 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5191 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 int regno = REGNO (operands[1]);
5197 ops[0] = operands[0];
5198 ops[1] = gen_rtx_REG (DImode, regno);
5199 ops[2] = gen_rtx_REG (DImode, regno + 4);
5200 ops[3] = gen_rtx_REG (DImode, regno + 8);
5201 ops[4] = gen_rtx_REG (DImode, regno + 12);
5202 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5205 [(set_attr "type" "neon_store4_4reg<q>")]
5208 (define_insn "neon_vst4qb<mode>"
5209 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5210 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5211 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5215 int regno = REGNO (operands[1]);
5217 ops[0] = operands[0];
5218 ops[1] = gen_rtx_REG (DImode, regno + 2);
5219 ops[2] = gen_rtx_REG (DImode, regno + 6);
5220 ops[3] = gen_rtx_REG (DImode, regno + 10);
5221 ops[4] = gen_rtx_REG (DImode, regno + 14);
5222 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5225 [(set_attr "type" "neon_store4_4reg<q>")]
5228 (define_insn "neon_vst4_lane<mode>"
5229 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5230 (unspec:<V_four_elem>
5231 [(match_operand:OI 1 "s_register_operand" "w")
5232 (match_operand:SI 2 "immediate_operand" "i")
5233 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5237 HOST_WIDE_INT lane = INTVAL (operands[2]);
5238 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5239 int regno = REGNO (operands[1]);
5241 if (lane < 0 || lane >= max)
5242 error ("lane out of range");
5243 ops[0] = operands[0];
5244 ops[1] = gen_rtx_REG (DImode, regno);
5245 ops[2] = gen_rtx_REG (DImode, regno + 2);
5246 ops[3] = gen_rtx_REG (DImode, regno + 4);
5247 ops[4] = gen_rtx_REG (DImode, regno + 6);
5248 ops[5] = operands[2];
5249 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5253 [(set_attr "type" "neon_store4_one_lane<q>")]
5256 (define_insn "neon_vst4_lane<mode>"
5257 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5258 (unspec:<V_four_elem>
5259 [(match_operand:XI 1 "s_register_operand" "w")
5260 (match_operand:SI 2 "immediate_operand" "i")
5261 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5265 HOST_WIDE_INT lane = INTVAL (operands[2]);
5266 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5267 int regno = REGNO (operands[1]);
5269 if (lane < 0 || lane >= max)
5270 error ("lane out of range");
5271 else if (lane >= max / 2)
5276 ops[0] = operands[0];
5277 ops[1] = gen_rtx_REG (DImode, regno);
5278 ops[2] = gen_rtx_REG (DImode, regno + 4);
5279 ops[3] = gen_rtx_REG (DImode, regno + 8);
5280 ops[4] = gen_rtx_REG (DImode, regno + 12);
5281 ops[5] = GEN_INT (lane);
5282 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5286 [(set_attr "type" "neon_store4_4reg<q>")]
5289 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5290 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5291 (SE:<V_unpack> (vec_select:<V_HALF>
5292 (match_operand:VU 1 "register_operand" "w")
5293 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5294 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5295 "vmovl.<US><V_sz_elem> %q0, %e1"
5296 [(set_attr "type" "neon_shift_imm_long")]
5299 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5300 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5301 (SE:<V_unpack> (vec_select:<V_HALF>
5302 (match_operand:VU 1 "register_operand" "w")
5303 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5304 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5305 "vmovl.<US><V_sz_elem> %q0, %f1"
5306 [(set_attr "type" "neon_shift_imm_long")]
5309 (define_expand "vec_unpack<US>_hi_<mode>"
5310 [(match_operand:<V_unpack> 0 "register_operand" "")
5311 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5312 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5314 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5317 for (i = 0; i < (<V_mode_nunits>/2); i++)
5318 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5320 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5321 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5328 (define_expand "vec_unpack<US>_lo_<mode>"
5329 [(match_operand:<V_unpack> 0 "register_operand" "")
5330 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5333 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5336 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5337 RTVEC_ELT (v, i) = GEN_INT (i);
5338 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5339 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5346 (define_insn "neon_vec_<US>mult_lo_<mode>"
5347 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5348 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5349 (match_operand:VU 1 "register_operand" "w")
5350 (match_operand:VU 2 "vect_par_constant_low" "")))
5351 (SE:<V_unpack> (vec_select:<V_HALF>
5352 (match_operand:VU 3 "register_operand" "w")
5354 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5355 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5356 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5359 (define_expand "vec_widen_<US>mult_lo_<mode>"
5360 [(match_operand:<V_unpack> 0 "register_operand" "")
5361 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5362 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5363 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5365 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5368 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5369 RTVEC_ELT (v, i) = GEN_INT (i);
5370 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5372 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5380 (define_insn "neon_vec_<US>mult_hi_<mode>"
5381 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5382 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5383 (match_operand:VU 1 "register_operand" "w")
5384 (match_operand:VU 2 "vect_par_constant_high" "")))
5385 (SE:<V_unpack> (vec_select:<V_HALF>
5386 (match_operand:VU 3 "register_operand" "w")
5388 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5389 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5390 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5393 (define_expand "vec_widen_<US>mult_hi_<mode>"
5394 [(match_operand:<V_unpack> 0 "register_operand" "")
5395 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5396 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5397 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5399 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5402 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5403 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5404 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5406 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5415 (define_insn "neon_vec_<US>shiftl_<mode>"
5416 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5417 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5418 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5421 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5423 [(set_attr "type" "neon_shift_imm_long")]
5426 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5427 [(match_operand:<V_unpack> 0 "register_operand" "")
5428 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5429 (match_operand:SI 2 "immediate_operand" "i")]
5430 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5432 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5433 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5439 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5440 [(match_operand:<V_unpack> 0 "register_operand" "")
5441 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5442 (match_operand:SI 2 "immediate_operand" "i")]
5443 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5445 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5446 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5447 GET_MODE_SIZE (<V_HALF>mode)),
5453 ;; Vectorize for non-neon-quad case
5454 (define_insn "neon_unpack<US>_<mode>"
5455 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5456 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5458 "vmovl.<US><V_sz_elem> %q0, %P1"
5459 [(set_attr "type" "neon_move")]
5462 (define_expand "vec_unpack<US>_lo_<mode>"
5463 [(match_operand:<V_double_width> 0 "register_operand" "")
5464 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5467 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5468 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5469 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5475 (define_expand "vec_unpack<US>_hi_<mode>"
5476 [(match_operand:<V_double_width> 0 "register_operand" "")
5477 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5480 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5481 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5482 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5488 (define_insn "neon_vec_<US>mult_<mode>"
5489 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5490 (mult:<V_widen> (SE:<V_widen>
5491 (match_operand:VDI 1 "register_operand" "w"))
5493 (match_operand:VDI 2 "register_operand" "w"))))]
5495 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5496 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5499 (define_expand "vec_widen_<US>mult_hi_<mode>"
5500 [(match_operand:<V_double_width> 0 "register_operand" "")
5501 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5502 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5505 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5506 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5507 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5514 (define_expand "vec_widen_<US>mult_lo_<mode>"
5515 [(match_operand:<V_double_width> 0 "register_operand" "")
5516 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5517 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5520 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5521 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5522 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5529 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5530 [(match_operand:<V_double_width> 0 "register_operand" "")
5531 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5532 (match_operand:SI 2 "immediate_operand" "i")]
5535 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5536 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5537 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5543 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5544 [(match_operand:<V_double_width> 0 "register_operand" "")
5545 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5546 (match_operand:SI 2 "immediate_operand" "i")]
5549 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5550 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5551 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5557 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5558 ; because the ordering of vector elements in Q registers is different from what
5559 ; the semantics of the instructions require.
5561 (define_insn "vec_pack_trunc_<mode>"
5562 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5563 (vec_concat:<V_narrow_pack>
5564 (truncate:<V_narrow>
5565 (match_operand:VN 1 "register_operand" "w"))
5566 (truncate:<V_narrow>
5567 (match_operand:VN 2 "register_operand" "w"))))]
5568 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5569 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5570 [(set_attr "type" "multiple")
5571 (set_attr "length" "8")]
5574 ;; For the non-quad case.
5575 (define_insn "neon_vec_pack_trunc_<mode>"
5576 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5577 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5578 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5579 "vmovn.i<V_sz_elem>\t%P0, %q1"
5580 [(set_attr "type" "neon_move_narrow_q")]
5583 (define_expand "vec_pack_trunc_<mode>"
5584 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5585 (match_operand:VSHFT 1 "register_operand" "")
5586 (match_operand:VSHFT 2 "register_operand")]
5587 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5589 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5591 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5592 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5593 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5597 (define_insn "neon_vabd<mode>_2"
5598 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5599 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5600 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5601 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5602 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5604 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5605 (const_string "neon_fp_abd_s<q>")
5606 (const_string "neon_abd<q>")))]
5609 (define_insn "neon_vabd<mode>_3"
5610 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5611 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5612 (match_operand:VDQ 2 "s_register_operand" "w")]
5614 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5615 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5617 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5618 (const_string "neon_fp_abd_s<q>")
5619 (const_string "neon_abd<q>")))]
5622 ;; Copy from core-to-neon regs, then extend, not vice-versa
5625 [(set (match_operand:DI 0 "s_register_operand" "")
5626 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5627 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5628 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5629 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5631 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5635 [(set (match_operand:DI 0 "s_register_operand" "")
5636 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5637 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5638 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5639 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5641 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5645 [(set (match_operand:DI 0 "s_register_operand" "")
5646 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5647 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5648 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5649 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5651 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5655 [(set (match_operand:DI 0 "s_register_operand" "")
5656 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5657 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5658 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5659 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5661 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5665 [(set (match_operand:DI 0 "s_register_operand" "")
5666 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5667 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5668 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5669 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5671 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5675 [(set (match_operand:DI 0 "s_register_operand" "")
5676 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5677 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5678 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5679 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5681 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));