1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1198 ;; shift-count granularity. That's good enough for the middle-end's current
1201 ;; Note that it's not safe to perform such an operation in big-endian mode,
1202 ;; due to element-ordering issues.
1204 (define_expand "vec_shr_<mode>"
1205 [(match_operand:VDQ 0 "s_register_operand" "")
1206 (match_operand:VDQ 1 "s_register_operand" "")
1207 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1208 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1211 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1212 const int width = GET_MODE_BITSIZE (<MODE>mode);
1213 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1214 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1215 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1217 if (num_bits == width)
1219 emit_move_insn (operands[0], operands[1]);
1223 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1224 operands[0] = gen_lowpart (bvecmode, operands[0]);
1225 operands[1] = gen_lowpart (bvecmode, operands[1]);
1227 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1228 GEN_INT (num_bits / BITS_PER_UNIT)));
1232 (define_expand "vec_shl_<mode>"
1233 [(match_operand:VDQ 0 "s_register_operand" "")
1234 (match_operand:VDQ 1 "s_register_operand" "")
1235 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1236 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1239 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1240 const int width = GET_MODE_BITSIZE (<MODE>mode);
1241 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1242 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1243 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1247 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1251 num_bits = width - num_bits;
1253 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1254 operands[0] = gen_lowpart (bvecmode, operands[0]);
1255 operands[1] = gen_lowpart (bvecmode, operands[1]);
1257 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1258 GEN_INT (num_bits / BITS_PER_UNIT)));
1262 ;; Helpers for quad-word reduction operations
1264 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1265 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1266 ; N/2-element vector.
1268 (define_insn "quad_halves_<code>v4si"
1269 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1271 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1272 (parallel [(const_int 0) (const_int 1)]))
1273 (vec_select:V2SI (match_dup 1)
1274 (parallel [(const_int 2) (const_int 3)]))))]
1276 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1277 [(set_attr "vqh_mnem" "<VQH_mnem>")
1278 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1281 (define_insn "quad_halves_<code>v4sf"
1282 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1284 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1285 (parallel [(const_int 0) (const_int 1)]))
1286 (vec_select:V2SF (match_dup 1)
1287 (parallel [(const_int 2) (const_int 3)]))))]
1288 "TARGET_NEON && flag_unsafe_math_optimizations"
1289 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1294 (define_insn "quad_halves_<code>v8hi"
1295 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1297 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)]))
1300 (vec_select:V4HI (match_dup 1)
1301 (parallel [(const_int 4) (const_int 5)
1302 (const_int 6) (const_int 7)]))))]
1304 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1305 [(set_attr "vqh_mnem" "<VQH_mnem>")
1306 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1309 (define_insn "quad_halves_<code>v16qi"
1310 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1312 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1313 (parallel [(const_int 0) (const_int 1)
1314 (const_int 2) (const_int 3)
1315 (const_int 4) (const_int 5)
1316 (const_int 6) (const_int 7)]))
1317 (vec_select:V8QI (match_dup 1)
1318 (parallel [(const_int 8) (const_int 9)
1319 (const_int 10) (const_int 11)
1320 (const_int 12) (const_int 13)
1321 (const_int 14) (const_int 15)]))))]
1323 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1324 [(set_attr "vqh_mnem" "<VQH_mnem>")
1325 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1328 (define_expand "move_hi_quad_<mode>"
1329 [(match_operand:ANY128 0 "s_register_operand" "")
1330 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1333 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1334 GET_MODE_SIZE (<V_HALF>mode)),
1339 (define_expand "move_lo_quad_<mode>"
1340 [(match_operand:ANY128 0 "s_register_operand" "")
1341 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1344 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1350 ;; Reduction operations
1352 (define_expand "reduc_plus_scal_<mode>"
1353 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1354 (match_operand:VD 1 "s_register_operand" "")]
1355 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1357 rtx vec = gen_reg_rtx (<MODE>mode);
1358 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1359 &gen_neon_vpadd_internal<mode>);
1360 /* The same result is actually computed into every element. */
1361 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1365 (define_expand "reduc_plus_scal_<mode>"
1366 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1367 (match_operand:VQ 1 "s_register_operand" "")]
1368 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1369 && !BYTES_BIG_ENDIAN"
1371 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1373 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1374 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1379 (define_expand "reduc_plus_scal_v2di"
1380 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1381 (match_operand:V2DI 1 "s_register_operand" "")]
1382 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1384 rtx vec = gen_reg_rtx (V2DImode);
1386 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1387 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1392 (define_insn "arm_reduc_plus_internal_v2di"
1393 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1394 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1396 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1397 "vadd.i64\t%e0, %e1, %f1"
1398 [(set_attr "type" "neon_add_q")]
1401 (define_expand "reduc_smin_scal_<mode>"
1402 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1403 (match_operand:VD 1 "s_register_operand" "")]
1404 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1406 rtx vec = gen_reg_rtx (<MODE>mode);
1408 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1409 &gen_neon_vpsmin<mode>);
1410 /* The result is computed into every element of the vector. */
1411 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1415 (define_expand "reduc_smin_scal_<mode>"
1416 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1417 (match_operand:VQ 1 "s_register_operand" "")]
1418 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1419 && !BYTES_BIG_ENDIAN"
1421 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1423 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1424 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1429 (define_expand "reduc_smax_scal_<mode>"
1430 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1431 (match_operand:VD 1 "s_register_operand" "")]
1432 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1434 rtx vec = gen_reg_rtx (<MODE>mode);
1435 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1436 &gen_neon_vpsmax<mode>);
1437 /* The result is computed into every element of the vector. */
1438 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1442 (define_expand "reduc_smax_scal_<mode>"
1443 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1444 (match_operand:VQ 1 "s_register_operand" "")]
1445 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1446 && !BYTES_BIG_ENDIAN"
1448 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1450 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1451 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1456 (define_expand "reduc_umin_scal_<mode>"
1457 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1458 (match_operand:VDI 1 "s_register_operand" "")]
1461 rtx vec = gen_reg_rtx (<MODE>mode);
1462 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1463 &gen_neon_vpumin<mode>);
1464 /* The result is computed into every element of the vector. */
1465 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1469 (define_expand "reduc_umin_scal_<mode>"
1470 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1471 (match_operand:VQI 1 "s_register_operand" "")]
1472 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1474 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1476 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1477 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1482 (define_expand "reduc_umax_scal_<mode>"
1483 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1484 (match_operand:VDI 1 "s_register_operand" "")]
1487 rtx vec = gen_reg_rtx (<MODE>mode);
1488 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1489 &gen_neon_vpumax<mode>);
1490 /* The result is computed into every element of the vector. */
1491 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1495 (define_expand "reduc_umax_scal_<mode>"
1496 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1497 (match_operand:VQI 1 "s_register_operand" "")]
1498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1500 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1502 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1503 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1508 (define_insn "neon_vpadd_internal<mode>"
1509 [(set (match_operand:VD 0 "s_register_operand" "=w")
1510 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1511 (match_operand:VD 2 "s_register_operand" "w")]
1514 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1515 ;; Assume this schedules like vadd.
1517 (if_then_else (match_test "<Is_float_mode>")
1518 (const_string "neon_fp_reduc_add_s<q>")
1519 (const_string "neon_reduc_add<q>")))]
1522 (define_insn "neon_vpsmin<mode>"
1523 [(set (match_operand:VD 0 "s_register_operand" "=w")
1524 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1525 (match_operand:VD 2 "s_register_operand" "w")]
1528 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1530 (if_then_else (match_test "<Is_float_mode>")
1531 (const_string "neon_fp_reduc_minmax_s<q>")
1532 (const_string "neon_reduc_minmax<q>")))]
1535 (define_insn "neon_vpsmax<mode>"
1536 [(set (match_operand:VD 0 "s_register_operand" "=w")
1537 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1538 (match_operand:VD 2 "s_register_operand" "w")]
1541 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1543 (if_then_else (match_test "<Is_float_mode>")
1544 (const_string "neon_fp_reduc_minmax_s<q>")
1545 (const_string "neon_reduc_minmax<q>")))]
1548 (define_insn "neon_vpumin<mode>"
1549 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1550 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1551 (match_operand:VDI 2 "s_register_operand" "w")]
1554 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1555 [(set_attr "type" "neon_reduc_minmax<q>")]
1558 (define_insn "neon_vpumax<mode>"
1559 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1560 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1561 (match_operand:VDI 2 "s_register_operand" "w")]
1564 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1565 [(set_attr "type" "neon_reduc_minmax<q>")]
1568 ;; Saturating arithmetic
1570 ; NOTE: Neon supports many more saturating variants of instructions than the
1571 ; following, but these are all GCC currently understands.
1572 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1573 ; yet either, although these patterns may be used by intrinsics when they're
1576 (define_insn "*ss_add<mode>_neon"
1577 [(set (match_operand:VD 0 "s_register_operand" "=w")
1578 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1579 (match_operand:VD 2 "s_register_operand" "w")))]
1581 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1582 [(set_attr "type" "neon_qadd<q>")]
1585 (define_insn "*us_add<mode>_neon"
1586 [(set (match_operand:VD 0 "s_register_operand" "=w")
1587 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1588 (match_operand:VD 2 "s_register_operand" "w")))]
1590 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1591 [(set_attr "type" "neon_qadd<q>")]
1594 (define_insn "*ss_sub<mode>_neon"
1595 [(set (match_operand:VD 0 "s_register_operand" "=w")
1596 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1597 (match_operand:VD 2 "s_register_operand" "w")))]
1599 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1600 [(set_attr "type" "neon_qsub<q>")]
1603 (define_insn "*us_sub<mode>_neon"
1604 [(set (match_operand:VD 0 "s_register_operand" "=w")
1605 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1606 (match_operand:VD 2 "s_register_operand" "w")))]
1608 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1609 [(set_attr "type" "neon_qsub<q>")]
1612 ;; Conditional instructions. These are comparisons with conditional moves for
1613 ;; vectors. They perform the assignment:
1615 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1617 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1620 (define_expand "vcond<mode><mode>"
1621 [(set (match_operand:VDQW 0 "s_register_operand" "")
1623 (match_operator 3 "comparison_operator"
1624 [(match_operand:VDQW 4 "s_register_operand" "")
1625 (match_operand:VDQW 5 "nonmemory_operand" "")])
1626 (match_operand:VDQW 1 "s_register_operand" "")
1627 (match_operand:VDQW 2 "s_register_operand" "")))]
1628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1631 int use_zero_form = 0;
1632 int swap_bsl_operands = 0;
1633 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1634 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1636 rtx (*base_comparison) (rtx, rtx, rtx);
1637 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1639 switch (GET_CODE (operands[3]))
1646 if (operands[5] == CONST0_RTX (<MODE>mode))
1653 if (!REG_P (operands[5]))
1654 operands[5] = force_reg (<MODE>mode, operands[5]);
1657 switch (GET_CODE (operands[3]))
1667 base_comparison = gen_neon_vcge<mode>;
1668 complimentary_comparison = gen_neon_vcgt<mode>;
1676 base_comparison = gen_neon_vcgt<mode>;
1677 complimentary_comparison = gen_neon_vcge<mode>;
1682 base_comparison = gen_neon_vceq<mode>;
1683 complimentary_comparison = gen_neon_vceq<mode>;
1689 switch (GET_CODE (operands[3]))
1696 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1697 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1703 Note that there also exist direct comparison against 0 forms,
1704 so catch those as a special case. */
1708 switch (GET_CODE (operands[3]))
1711 base_comparison = gen_neon_vclt<mode>;
1714 base_comparison = gen_neon_vcle<mode>;
1717 /* Do nothing, other zero form cases already have the correct
1724 emit_insn (base_comparison (mask, operands[4], operands[5]));
1726 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1733 /* Vector compare returns false for lanes which are unordered, so if we use
1734 the inverse of the comparison we actually want to emit, then
1735 swap the operands to BSL, we will end up with the correct result.
1736 Note that a NE NaN and NaN NE b are true for all a, b.
1738 Our transformations are:
1743 a NE b -> !(a EQ b) */
1746 emit_insn (base_comparison (mask, operands[4], operands[5]));
1748 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1750 swap_bsl_operands = 1;
1753 /* We check (a > b || b > a). combining these comparisons give us
1754 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1755 will then give us (a == b || a UNORDERED b) as intended. */
1757 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1758 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1759 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1760 swap_bsl_operands = 1;
1763 /* Operands are ORDERED iff (a > b || b >= a).
1764 Swapping the operands to BSL will give the UNORDERED case. */
1765 swap_bsl_operands = 1;
1768 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1769 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1770 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1776 if (swap_bsl_operands)
1777 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1780 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1785 (define_expand "vcondu<mode><mode>"
1786 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1788 (match_operator 3 "arm_comparison_operator"
1789 [(match_operand:VDQIW 4 "s_register_operand" "")
1790 (match_operand:VDQIW 5 "s_register_operand" "")])
1791 (match_operand:VDQIW 1 "s_register_operand" "")
1792 (match_operand:VDQIW 2 "s_register_operand" "")))]
1796 int inverse = 0, immediate_zero = 0;
1798 mask = gen_reg_rtx (<V_cmp_result>mode);
1800 if (operands[5] == CONST0_RTX (<MODE>mode))
1802 else if (!REG_P (operands[5]))
1803 operands[5] = force_reg (<MODE>mode, operands[5]);
1805 switch (GET_CODE (operands[3]))
1808 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1812 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1816 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1821 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1823 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1828 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1830 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1834 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1843 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1846 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1852 ;; Patterns for builtins.
1854 ; good for plain vadd, vaddq.
1856 (define_expand "neon_vadd<mode>"
1857 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1858 (match_operand:VCVTF 1 "s_register_operand" "w")
1859 (match_operand:VCVTF 2 "s_register_operand" "w")]
1862 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1863 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1865 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1870 ; Note that NEON operations don't support the full IEEE 754 standard: in
1871 ; particular, denormal values are flushed to zero. This means that GCC cannot
1872 ; use those instructions for autovectorization, etc. unless
1873 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1874 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1875 ; header) must work in either case: if -funsafe-math-optimizations is given,
1876 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1877 ; expand to unspecs (which may potentially limit the extent to which they might
1878 ; be optimized by generic code).
1880 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1882 (define_insn "neon_vadd<mode>_unspec"
1883 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1884 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1885 (match_operand:VCVTF 2 "s_register_operand" "w")]
1888 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1890 (if_then_else (match_test "<Is_float_mode>")
1891 (const_string "neon_fp_addsub_s<q>")
1892 (const_string "neon_add<q>")))]
1895 (define_insn "neon_vaddl<sup><mode>"
1896 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1897 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1898 (match_operand:VDI 2 "s_register_operand" "w")]
1901 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1902 [(set_attr "type" "neon_add_long")]
1905 (define_insn "neon_vaddw<sup><mode>"
1906 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1907 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1908 (match_operand:VDI 2 "s_register_operand" "w")]
1911 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1912 [(set_attr "type" "neon_add_widen")]
1917 (define_insn "neon_v<r>hadd<sup><mode>"
1918 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1919 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1920 (match_operand:VDQIW 2 "s_register_operand" "w")]
1923 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1924 [(set_attr "type" "neon_add_halve_q")]
1927 (define_insn "neon_vqadd<sup><mode>"
1928 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1929 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1930 (match_operand:VDQIX 2 "s_register_operand" "w")]
1933 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1934 [(set_attr "type" "neon_qadd<q>")]
1937 (define_insn "neon_v<r>addhn<mode>"
1938 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1939 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1940 (match_operand:VN 2 "s_register_operand" "w")]
1943 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1944 [(set_attr "type" "neon_add_halve_narrow_q")]
1947 ;; Polynomial and Float multiplication.
1948 (define_insn "neon_vmul<pf><mode>"
1949 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1950 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1951 (match_operand:VPF 2 "s_register_operand" "w")]
1954 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1956 (if_then_else (match_test "<Is_float_mode>")
1957 (const_string "neon_fp_mul_s<q>")
1958 (const_string "neon_mul_<V_elem_ch><q>")))]
1961 (define_expand "neon_vmla<mode>"
1962 [(match_operand:VDQW 0 "s_register_operand" "=w")
1963 (match_operand:VDQW 1 "s_register_operand" "0")
1964 (match_operand:VDQW 2 "s_register_operand" "w")
1965 (match_operand:VDQW 3 "s_register_operand" "w")]
1968 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1969 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1970 operands[2], operands[3]));
1972 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1973 operands[2], operands[3]));
1977 (define_expand "neon_vfma<VCVTF:mode>"
1978 [(match_operand:VCVTF 0 "s_register_operand")
1979 (match_operand:VCVTF 1 "s_register_operand")
1980 (match_operand:VCVTF 2 "s_register_operand")
1981 (match_operand:VCVTF 3 "s_register_operand")]
1982 "TARGET_NEON && TARGET_FMA"
1984 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1989 (define_expand "neon_vfms<VCVTF:mode>"
1990 [(match_operand:VCVTF 0 "s_register_operand")
1991 (match_operand:VCVTF 1 "s_register_operand")
1992 (match_operand:VCVTF 2 "s_register_operand")
1993 (match_operand:VCVTF 3 "s_register_operand")]
1994 "TARGET_NEON && TARGET_FMA"
1996 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2001 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2003 (define_insn "neon_vmla<mode>_unspec"
2004 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2005 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2006 (match_operand:VDQW 2 "s_register_operand" "w")
2007 (match_operand:VDQW 3 "s_register_operand" "w")]
2010 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2012 (if_then_else (match_test "<Is_float_mode>")
2013 (const_string "neon_fp_mla_s<q>")
2014 (const_string "neon_mla_<V_elem_ch><q>")))]
2017 (define_insn "neon_vmlal<sup><mode>"
2018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2019 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2020 (match_operand:VW 2 "s_register_operand" "w")
2021 (match_operand:VW 3 "s_register_operand" "w")]
2024 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2025 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2028 (define_expand "neon_vmls<mode>"
2029 [(match_operand:VDQW 0 "s_register_operand" "=w")
2030 (match_operand:VDQW 1 "s_register_operand" "0")
2031 (match_operand:VDQW 2 "s_register_operand" "w")
2032 (match_operand:VDQW 3 "s_register_operand" "w")]
2035 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2036 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2037 operands[1], operands[2], operands[3]));
2039 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2040 operands[2], operands[3]));
2044 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2046 (define_insn "neon_vmls<mode>_unspec"
2047 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2048 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2049 (match_operand:VDQW 2 "s_register_operand" "w")
2050 (match_operand:VDQW 3 "s_register_operand" "w")]
2053 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2055 (if_then_else (match_test "<Is_float_mode>")
2056 (const_string "neon_fp_mla_s<q>")
2057 (const_string "neon_mla_<V_elem_ch><q>")))]
2060 (define_insn "neon_vmlsl<sup><mode>"
2061 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2062 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2063 (match_operand:VW 2 "s_register_operand" "w")
2064 (match_operand:VW 3 "s_register_operand" "w")]
2067 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2068 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2071 ;; vqdmulh, vqrdmulh
2072 (define_insn "neon_vq<r>dmulh<mode>"
2073 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2074 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2075 (match_operand:VMDQI 2 "s_register_operand" "w")]
2078 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2079 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2082 (define_insn "neon_vqdmlal<mode>"
2083 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2084 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2085 (match_operand:VMDI 2 "s_register_operand" "w")
2086 (match_operand:VMDI 3 "s_register_operand" "w")]
2089 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2090 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2093 (define_insn "neon_vqdmlsl<mode>"
2094 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2095 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2096 (match_operand:VMDI 2 "s_register_operand" "w")
2097 (match_operand:VMDI 3 "s_register_operand" "w")]
2100 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2101 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2104 (define_insn "neon_vmull<sup><mode>"
2105 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2106 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2107 (match_operand:VW 2 "s_register_operand" "w")]
2110 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2111 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2114 (define_insn "neon_vqdmull<mode>"
2115 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2116 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2117 (match_operand:VMDI 2 "s_register_operand" "w")]
2120 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2121 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2124 (define_expand "neon_vsub<mode>"
2125 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2126 (match_operand:VCVTF 1 "s_register_operand" "w")
2127 (match_operand:VCVTF 2 "s_register_operand" "w")]
2130 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2131 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2133 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2138 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2140 (define_insn "neon_vsub<mode>_unspec"
2141 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2142 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2143 (match_operand:VCVTF 2 "s_register_operand" "w")]
2146 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2148 (if_then_else (match_test "<Is_float_mode>")
2149 (const_string "neon_fp_addsub_s<q>")
2150 (const_string "neon_sub<q>")))]
2153 (define_insn "neon_vsubl<sup><mode>"
2154 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2155 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2156 (match_operand:VDI 2 "s_register_operand" "w")]
2159 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2160 [(set_attr "type" "neon_sub_long")]
2163 (define_insn "neon_vsubw<sup><mode>"
2164 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2165 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2166 (match_operand:VDI 2 "s_register_operand" "w")]
2169 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2170 [(set_attr "type" "neon_sub_widen")]
2173 (define_insn "neon_vqsub<sup><mode>"
2174 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2175 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2176 (match_operand:VDQIX 2 "s_register_operand" "w")]
2179 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2180 [(set_attr "type" "neon_qsub<q>")]
2183 (define_insn "neon_vhsub<sup><mode>"
2184 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2185 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2186 (match_operand:VDQIW 2 "s_register_operand" "w")]
2189 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2190 [(set_attr "type" "neon_sub_halve<q>")]
2193 (define_insn "neon_v<r>subhn<mode>"
2194 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2195 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2196 (match_operand:VN 2 "s_register_operand" "w")]
2199 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2200 [(set_attr "type" "neon_sub_halve_narrow_q")]
2203 (define_insn "neon_vceq<mode>"
2204 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2205 (unspec:<V_cmp_result>
2206 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2207 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
2211 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2212 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2214 (if_then_else (match_test "<Is_float_mode>")
2215 (const_string "neon_fp_compare_s<q>")
2216 (if_then_else (match_operand 2 "zero_operand")
2217 (const_string "neon_compare_zero<q>")
2218 (const_string "neon_compare<q>"))))]
2221 (define_insn "neon_vcge<mode>"
2222 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2223 (unspec:<V_cmp_result>
2224 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2225 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
2229 vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2230 vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
2232 (if_then_else (match_test "<Is_float_mode>")
2233 (const_string "neon_fp_compare_s<q>")
2234 (if_then_else (match_operand 2 "zero_operand")
2235 (const_string "neon_compare_zero<q>")
2236 (const_string "neon_compare<q>"))))]
2239 (define_insn "neon_vcgeu<mode>"
2240 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2241 (unspec:<V_cmp_result>
2242 [(match_operand:VDQIW 1 "s_register_operand" "w")
2243 (match_operand:VDQIW 2 "s_register_operand" "w")]
2246 "vcge.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2247 [(set_attr "type" "neon_compare<q>")]
2250 (define_insn "neon_vcgt<mode>"
2251 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2252 (unspec:<V_cmp_result>
2253 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2254 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]
2258 vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2259 vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
2261 (if_then_else (match_test "<Is_float_mode>")
2262 (const_string "neon_fp_compare_s<q>")
2263 (if_then_else (match_operand 2 "zero_operand")
2264 (const_string "neon_compare_zero<q>")
2265 (const_string "neon_compare<q>"))))]
2268 (define_insn "neon_vcgtu<mode>"
2269 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2270 (unspec:<V_cmp_result>
2271 [(match_operand:VDQIW 1 "s_register_operand" "w")
2272 (match_operand:VDQIW 2 "s_register_operand" "w")]
2275 "vcgt.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2276 [(set_attr "type" "neon_compare<q>")]
2279 ;; VCLE and VCLT only support comparisons with immediate zero (register
2280 ;; variants are VCGE and VCGT with operands reversed).
2282 (define_insn "neon_vcle<mode>"
2283 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2284 (unspec:<V_cmp_result>
2285 [(match_operand:VDQW 1 "s_register_operand" "w")
2286 (match_operand:VDQW 2 "zero_operand" "Dz")]
2289 "vcle.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
2291 (if_then_else (match_test "<Is_float_mode>")
2292 (const_string "neon_fp_compare_s<q>")
2293 (if_then_else (match_operand 2 "zero_operand")
2294 (const_string "neon_compare_zero<q>")
2295 (const_string "neon_compare<q>"))))]
2298 (define_insn "neon_vclt<mode>"
2299 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2300 (unspec:<V_cmp_result>
2301 [(match_operand:VDQW 1 "s_register_operand" "w")
2302 (match_operand:VDQW 2 "zero_operand" "Dz")]
2305 "vclt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"
2307 (if_then_else (match_test "<Is_float_mode>")
2308 (const_string "neon_fp_compare_s<q>")
2309 (if_then_else (match_operand 2 "zero_operand")
2310 (const_string "neon_compare_zero<q>")
2311 (const_string "neon_compare<q>"))))]
2314 (define_insn "neon_vcage<mode>"
2315 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2316 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2317 (match_operand:VCVTF 2 "s_register_operand" "w")]
2320 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2321 [(set_attr "type" "neon_fp_compare_s<q>")]
2324 (define_insn "neon_vcagt<mode>"
2325 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2326 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2327 (match_operand:VCVTF 2 "s_register_operand" "w")]
2330 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2331 [(set_attr "type" "neon_fp_compare_s<q>")]
2334 (define_insn "neon_vtst<mode>"
2335 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2336 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2337 (match_operand:VDQIW 2 "s_register_operand" "w")]
2340 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2341 [(set_attr "type" "neon_tst<q>")]
2344 (define_insn "neon_vabd<sup><mode>"
2345 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2346 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2347 (match_operand:VDQIW 2 "s_register_operand" "w")]
2350 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2351 [(set_attr "type" "neon_abd<q>")]
2354 (define_insn "neon_vabdf<mode>"
2355 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2356 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2357 (match_operand:VCVTF 2 "s_register_operand" "w")]
2360 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2361 [(set_attr "type" "neon_fp_abd_s<q>")]
2364 (define_insn "neon_vabdl<sup><mode>"
2365 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2366 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2367 (match_operand:VW 2 "s_register_operand" "w")]
2370 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2371 [(set_attr "type" "neon_abd_long")]
2374 (define_insn "neon_vaba<sup><mode>"
2375 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2376 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2377 (match_operand:VDQIW 3 "s_register_operand" "w")]
2379 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2381 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2382 [(set_attr "type" "neon_arith_acc<q>")]
2385 (define_insn "neon_vabal<sup><mode>"
2386 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2387 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2388 (match_operand:VW 3 "s_register_operand" "w")]
2390 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2392 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2393 [(set_attr "type" "neon_arith_acc<q>")]
2396 (define_insn "neon_v<maxmin><sup><mode>"
2397 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2398 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2399 (match_operand:VDQIW 2 "s_register_operand" "w")]
2402 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2403 [(set_attr "type" "neon_minmax<q>")]
2406 (define_insn "neon_v<maxmin>f<mode>"
2407 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2408 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2409 (match_operand:VCVTF 2 "s_register_operand" "w")]
2412 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2413 [(set_attr "type" "neon_fp_minmax_s<q>")]
2416 (define_expand "neon_vpadd<mode>"
2417 [(match_operand:VD 0 "s_register_operand" "=w")
2418 (match_operand:VD 1 "s_register_operand" "w")
2419 (match_operand:VD 2 "s_register_operand" "w")]
2422 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2427 (define_insn "neon_vpaddl<sup><mode>"
2428 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2429 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2432 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2433 [(set_attr "type" "neon_reduc_add_long")]
2436 (define_insn "neon_vpadal<sup><mode>"
2437 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2438 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2439 (match_operand:VDQIW 2 "s_register_operand" "w")]
2442 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2443 [(set_attr "type" "neon_reduc_add_acc")]
2446 (define_insn "neon_vp<maxmin><sup><mode>"
2447 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2448 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2449 (match_operand:VDI 2 "s_register_operand" "w")]
2452 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2453 [(set_attr "type" "neon_reduc_minmax<q>")]
2456 (define_insn "neon_vp<maxmin>f<mode>"
2457 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2458 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2459 (match_operand:VCVTF 2 "s_register_operand" "w")]
2462 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2463 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2466 (define_insn "neon_vrecps<mode>"
2467 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2468 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2469 (match_operand:VCVTF 2 "s_register_operand" "w")]
2472 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2473 [(set_attr "type" "neon_fp_recps_s<q>")]
2476 (define_insn "neon_vrsqrts<mode>"
2477 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2478 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2479 (match_operand:VCVTF 2 "s_register_operand" "w")]
2482 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2483 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2486 (define_expand "neon_vabs<mode>"
2487 [(match_operand:VDQW 0 "s_register_operand" "")
2488 (match_operand:VDQW 1 "s_register_operand" "")]
2491 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2495 (define_insn "neon_vqabs<mode>"
2496 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2497 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2500 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2501 [(set_attr "type" "neon_qabs<q>")]
2504 (define_insn "neon_bswap<mode>"
2505 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2506 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2508 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2509 [(set_attr "type" "neon_rev<q>")]
2512 (define_expand "neon_vneg<mode>"
2513 [(match_operand:VDQW 0 "s_register_operand" "")
2514 (match_operand:VDQW 1 "s_register_operand" "")]
2517 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2521 (define_expand "neon_copysignf<mode>"
2522 [(match_operand:VCVTF 0 "register_operand")
2523 (match_operand:VCVTF 1 "register_operand")
2524 (match_operand:VCVTF 2 "register_operand")]
2528 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2529 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2530 rtvec v = rtvec_alloc (n_elt);
2532 /* Create bitmask for vector select. */
2533 for (i = 0; i < n_elt; ++i)
2534 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2536 emit_move_insn (v_bitmask,
2537 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2538 emit_move_insn (operands[0], operands[2]);
2539 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2540 <VCVTF:V_cmp_result>mode, 0);
2541 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2548 (define_insn "neon_vqneg<mode>"
2549 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2550 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2553 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2554 [(set_attr "type" "neon_qneg<q>")]
2557 (define_insn "neon_vcls<mode>"
2558 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2559 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2562 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2563 [(set_attr "type" "neon_cls<q>")]
2566 (define_insn "clz<mode>2"
2567 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2568 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2570 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2571 [(set_attr "type" "neon_cnt<q>")]
2574 (define_expand "neon_vclz<mode>"
2575 [(match_operand:VDQIW 0 "s_register_operand" "")
2576 (match_operand:VDQIW 1 "s_register_operand" "")]
2579 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2583 (define_insn "popcount<mode>2"
2584 [(set (match_operand:VE 0 "s_register_operand" "=w")
2585 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2587 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2588 [(set_attr "type" "neon_cnt<q>")]
2591 (define_expand "neon_vcnt<mode>"
2592 [(match_operand:VE 0 "s_register_operand" "=w")
2593 (match_operand:VE 1 "s_register_operand" "w")]
2596 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2600 (define_insn "neon_vrecpe<mode>"
2601 [(set (match_operand:V32 0 "s_register_operand" "=w")
2602 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2605 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2606 [(set_attr "type" "neon_fp_recpe_s<q>")]
2609 (define_insn "neon_vrsqrte<mode>"
2610 [(set (match_operand:V32 0 "s_register_operand" "=w")
2611 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2614 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2615 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2618 (define_expand "neon_vmvn<mode>"
2619 [(match_operand:VDQIW 0 "s_register_operand" "")
2620 (match_operand:VDQIW 1 "s_register_operand" "")]
2623 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2627 (define_insn "neon_vget_lane<mode>_sext_internal"
2628 [(set (match_operand:SI 0 "s_register_operand" "=r")
2630 (vec_select:<V_elem>
2631 (match_operand:VD 1 "s_register_operand" "w")
2632 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2635 if (BYTES_BIG_ENDIAN)
2637 int elt = INTVAL (operands[2]);
2638 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2639 operands[2] = GEN_INT (elt);
2641 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2643 [(set_attr "type" "neon_to_gp")]
2646 (define_insn "neon_vget_lane<mode>_zext_internal"
2647 [(set (match_operand:SI 0 "s_register_operand" "=r")
2649 (vec_select:<V_elem>
2650 (match_operand:VD 1 "s_register_operand" "w")
2651 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2654 if (BYTES_BIG_ENDIAN)
2656 int elt = INTVAL (operands[2]);
2657 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2658 operands[2] = GEN_INT (elt);
2660 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2662 [(set_attr "type" "neon_to_gp")]
2665 (define_insn "neon_vget_lane<mode>_sext_internal"
2666 [(set (match_operand:SI 0 "s_register_operand" "=r")
2668 (vec_select:<V_elem>
2669 (match_operand:VQ 1 "s_register_operand" "w")
2670 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2674 int regno = REGNO (operands[1]);
2675 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2676 unsigned int elt = INTVAL (operands[2]);
2677 unsigned int elt_adj = elt % halfelts;
2679 if (BYTES_BIG_ENDIAN)
2680 elt_adj = halfelts - 1 - elt_adj;
2682 ops[0] = operands[0];
2683 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2684 ops[2] = GEN_INT (elt_adj);
2685 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2689 [(set_attr "type" "neon_to_gp_q")]
2692 (define_insn "neon_vget_lane<mode>_zext_internal"
2693 [(set (match_operand:SI 0 "s_register_operand" "=r")
2695 (vec_select:<V_elem>
2696 (match_operand:VQ 1 "s_register_operand" "w")
2697 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2701 int regno = REGNO (operands[1]);
2702 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2703 unsigned int elt = INTVAL (operands[2]);
2704 unsigned int elt_adj = elt % halfelts;
2706 if (BYTES_BIG_ENDIAN)
2707 elt_adj = halfelts - 1 - elt_adj;
2709 ops[0] = operands[0];
2710 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2711 ops[2] = GEN_INT (elt_adj);
2712 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2716 [(set_attr "type" "neon_to_gp_q")]
2719 (define_expand "neon_vget_lane<mode>"
2720 [(match_operand:<V_ext> 0 "s_register_operand" "")
2721 (match_operand:VDQW 1 "s_register_operand" "")
2722 (match_operand:SI 2 "immediate_operand" "")]
2725 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2727 if (BYTES_BIG_ENDIAN)
2729 /* The intrinsics are defined in terms of a model where the
2730 element ordering in memory is vldm order, whereas the generic
2731 RTL is defined in terms of a model where the element ordering
2732 in memory is array order. Convert the lane number to conform
2734 unsigned int elt = INTVAL (operands[2]);
2735 unsigned int reg_nelts
2736 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2737 elt ^= reg_nelts - 1;
2738 operands[2] = GEN_INT (elt);
2741 if (GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2742 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2744 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2750 (define_expand "neon_vget_laneu<mode>"
2751 [(match_operand:<V_ext> 0 "s_register_operand" "")
2752 (match_operand:VDQIW 1 "s_register_operand" "")
2753 (match_operand:SI 2 "immediate_operand" "")]
2756 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2758 if (BYTES_BIG_ENDIAN)
2760 /* The intrinsics are defined in terms of a model where the
2761 element ordering in memory is vldm order, whereas the generic
2762 RTL is defined in terms of a model where the element ordering
2763 in memory is array order. Convert the lane number to conform
2765 unsigned int elt = INTVAL (operands[2]);
2766 unsigned int reg_nelts
2767 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2768 elt ^= reg_nelts - 1;
2769 operands[2] = GEN_INT (elt);
2772 if (GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2773 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2775 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2781 (define_expand "neon_vget_lanedi"
2782 [(match_operand:DI 0 "s_register_operand" "=r")
2783 (match_operand:DI 1 "s_register_operand" "w")
2784 (match_operand:SI 2 "immediate_operand" "")]
2787 neon_lane_bounds (operands[2], 0, 1);
2788 emit_move_insn (operands[0], operands[1]);
2792 (define_expand "neon_vget_lanev2di"
2793 [(match_operand:DI 0 "s_register_operand" "")
2794 (match_operand:V2DI 1 "s_register_operand" "")
2795 (match_operand:SI 2 "immediate_operand" "")]
2798 switch (INTVAL (operands[2]))
2801 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2804 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2807 neon_lane_bounds (operands[2], 0, 1);
2813 (define_expand "neon_vset_lane<mode>"
2814 [(match_operand:VDQ 0 "s_register_operand" "=w")
2815 (match_operand:<V_elem> 1 "s_register_operand" "r")
2816 (match_operand:VDQ 2 "s_register_operand" "0")
2817 (match_operand:SI 3 "immediate_operand" "i")]
2820 unsigned int elt = INTVAL (operands[3]);
2821 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2823 if (BYTES_BIG_ENDIAN)
2825 unsigned int reg_nelts
2826 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2827 elt ^= reg_nelts - 1;
2830 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2831 GEN_INT (1 << elt), operands[2]));
2835 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2837 (define_expand "neon_vset_lanedi"
2838 [(match_operand:DI 0 "s_register_operand" "=w")
2839 (match_operand:DI 1 "s_register_operand" "r")
2840 (match_operand:DI 2 "s_register_operand" "0")
2841 (match_operand:SI 3 "immediate_operand" "i")]
2844 neon_lane_bounds (operands[3], 0, 1);
2845 emit_move_insn (operands[0], operands[1]);
2849 (define_expand "neon_vcreate<mode>"
2850 [(match_operand:VDX 0 "s_register_operand" "")
2851 (match_operand:DI 1 "general_operand" "")]
2854 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2855 emit_move_insn (operands[0], src);
2859 (define_insn "neon_vdup_n<mode>"
2860 [(set (match_operand:VX 0 "s_register_operand" "=w")
2861 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2863 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2864 [(set_attr "type" "neon_from_gp<q>")]
2867 (define_insn "neon_vdup_n<mode>"
2868 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2869 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2872 vdup.<V_sz_elem>\t%<V_reg>0, %1
2873 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2874 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2877 (define_expand "neon_vdup_ndi"
2878 [(match_operand:DI 0 "s_register_operand" "=w")
2879 (match_operand:DI 1 "s_register_operand" "r")]
2882 emit_move_insn (operands[0], operands[1]);
2887 (define_insn "neon_vdup_nv2di"
2888 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2889 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2892 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2893 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2894 [(set_attr "length" "8")
2895 (set_attr "type" "multiple")]
2898 (define_insn "neon_vdup_lane<mode>_internal"
2899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2901 (vec_select:<V_elem>
2902 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2903 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2906 if (BYTES_BIG_ENDIAN)
2908 int elt = INTVAL (operands[2]);
2909 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2910 operands[2] = GEN_INT (elt);
2913 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2915 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2917 [(set_attr "type" "neon_dup<q>")]
2920 (define_expand "neon_vdup_lane<mode>"
2921 [(match_operand:VDQW 0 "s_register_operand" "=w")
2922 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2923 (match_operand:SI 2 "immediate_operand" "i")]
2926 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2927 if (BYTES_BIG_ENDIAN)
2929 unsigned int elt = INTVAL (operands[2]);
2930 unsigned int reg_nelts
2931 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2932 elt ^= reg_nelts - 1;
2933 operands[2] = GEN_INT (elt);
2935 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2940 ; Scalar index is ignored, since only zero is valid here.
2941 (define_expand "neon_vdup_lanedi"
2942 [(match_operand:DI 0 "s_register_operand" "=w")
2943 (match_operand:DI 1 "s_register_operand" "w")
2944 (match_operand:SI 2 "immediate_operand" "i")]
2947 neon_lane_bounds (operands[2], 0, 1);
2948 emit_move_insn (operands[0], operands[1]);
2952 ; Likewise for v2di, as the DImode second operand has only a single element.
2953 (define_expand "neon_vdup_lanev2di"
2954 [(match_operand:V2DI 0 "s_register_operand" "=w")
2955 (match_operand:DI 1 "s_register_operand" "w")
2956 (match_operand:SI 2 "immediate_operand" "i")]
2959 neon_lane_bounds (operands[2], 0, 1);
2960 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2964 ; Disabled before reload because we don't want combine doing something silly,
2965 ; but used by the post-reload expansion of neon_vcombine.
2966 (define_insn "*neon_vswp<mode>"
2967 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2968 (match_operand:VDQX 1 "s_register_operand" "+w"))
2969 (set (match_dup 1) (match_dup 0))]
2970 "TARGET_NEON && reload_completed"
2971 "vswp\t%<V_reg>0, %<V_reg>1"
2972 [(set_attr "type" "neon_permute<q>")]
2975 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2977 ;; FIXME: A different implementation of this builtin could make it much
2978 ;; more likely that we wouldn't actually need to output anything (we could make
2979 ;; it so that the reg allocator puts things in the right places magically
2980 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2982 (define_insn_and_split "neon_vcombine<mode>"
2983 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2984 (vec_concat:<V_DOUBLE>
2985 (match_operand:VDX 1 "s_register_operand" "w")
2986 (match_operand:VDX 2 "s_register_operand" "w")))]
2989 "&& reload_completed"
2992 neon_split_vcombine (operands);
2995 [(set_attr "type" "multiple")]
2998 (define_expand "neon_vget_high<mode>"
2999 [(match_operand:<V_HALF> 0 "s_register_operand")
3000 (match_operand:VQX 1 "s_register_operand")]
3003 emit_move_insn (operands[0],
3004 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3005 GET_MODE_SIZE (<V_HALF>mode)));
3009 (define_expand "neon_vget_low<mode>"
3010 [(match_operand:<V_HALF> 0 "s_register_operand")
3011 (match_operand:VQX 1 "s_register_operand")]
3014 emit_move_insn (operands[0],
3015 simplify_gen_subreg (<V_HALF>mode, operands[1],
3020 (define_insn "float<mode><V_cvtto>2"
3021 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3022 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3023 "TARGET_NEON && !flag_rounding_math"
3024 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3025 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3028 (define_insn "floatuns<mode><V_cvtto>2"
3029 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3030 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3031 "TARGET_NEON && !flag_rounding_math"
3032 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3033 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3036 (define_insn "fix_trunc<mode><V_cvtto>2"
3037 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3038 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3040 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3041 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3044 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3045 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3046 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3048 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3049 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3052 (define_insn "neon_vcvt<sup><mode>"
3053 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3054 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3057 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3058 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3061 (define_insn "neon_vcvt<sup><mode>"
3062 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3063 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3066 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3067 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3070 (define_insn "neon_vcvtv4sfv4hf"
3071 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3072 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3074 "TARGET_NEON && TARGET_FP16"
3075 "vcvt.f32.f16\t%q0, %P1"
3076 [(set_attr "type" "neon_fp_cvt_widen_h")]
3079 (define_insn "neon_vcvtv4hfv4sf"
3080 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3081 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3083 "TARGET_NEON && TARGET_FP16"
3084 "vcvt.f16.f32\t%P0, %q1"
3085 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3088 (define_insn "neon_vcvt<sup>_n<mode>"
3089 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3090 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3091 (match_operand:SI 2 "immediate_operand" "i")]
3095 neon_const_bounds (operands[2], 1, 33);
3096 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3098 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3101 (define_insn "neon_vcvt<sup>_n<mode>"
3102 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3103 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3104 (match_operand:SI 2 "immediate_operand" "i")]
3108 neon_const_bounds (operands[2], 1, 33);
3109 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3111 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3114 (define_insn "neon_vmovn<mode>"
3115 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3116 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3119 "vmovn.<V_if_elem>\t%P0, %q1"
3120 [(set_attr "type" "neon_shift_imm_narrow_q")]
3123 (define_insn "neon_vqmovn<sup><mode>"
3124 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3125 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3128 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3129 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3132 (define_insn "neon_vqmovun<mode>"
3133 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3134 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3137 "vqmovun.<V_s_elem>\t%P0, %q1"
3138 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3141 (define_insn "neon_vmovl<sup><mode>"
3142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3143 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3146 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3147 [(set_attr "type" "neon_shift_imm_long")]
3150 (define_insn "neon_vmul_lane<mode>"
3151 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3152 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3153 (match_operand:VMD 2 "s_register_operand"
3154 "<scalar_mul_constraint>")
3155 (match_operand:SI 3 "immediate_operand" "i")]
3159 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3160 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3163 (if_then_else (match_test "<Is_float_mode>")
3164 (const_string "neon_fp_mul_s_scalar<q>")
3165 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3168 (define_insn "neon_vmul_lane<mode>"
3169 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3170 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3171 (match_operand:<V_HALF> 2 "s_register_operand"
3172 "<scalar_mul_constraint>")
3173 (match_operand:SI 3 "immediate_operand" "i")]
3177 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3178 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3181 (if_then_else (match_test "<Is_float_mode>")
3182 (const_string "neon_fp_mul_s_scalar<q>")
3183 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3186 (define_insn "neon_vmull<sup>_lane<mode>"
3187 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3188 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3189 (match_operand:VMDI 2 "s_register_operand"
3190 "<scalar_mul_constraint>")
3191 (match_operand:SI 3 "immediate_operand" "i")]
3195 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3196 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3198 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3201 (define_insn "neon_vqdmull_lane<mode>"
3202 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3203 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3204 (match_operand:VMDI 2 "s_register_operand"
3205 "<scalar_mul_constraint>")
3206 (match_operand:SI 3 "immediate_operand" "i")]
3207 UNSPEC_VQDMULL_LANE))]
3210 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3211 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3213 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3216 (define_insn "neon_vq<r>dmulh_lane<mode>"
3217 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3218 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3219 (match_operand:<V_HALF> 2 "s_register_operand"
3220 "<scalar_mul_constraint>")
3221 (match_operand:SI 3 "immediate_operand" "i")]
3225 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3226 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3228 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3231 (define_insn "neon_vq<r>dmulh_lane<mode>"
3232 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3233 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3234 (match_operand:VMDI 2 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 3 "immediate_operand" "i")]
3240 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3241 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3243 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3246 (define_insn "neon_vmla_lane<mode>"
3247 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3248 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3249 (match_operand:VMD 2 "s_register_operand" "w")
3250 (match_operand:VMD 3 "s_register_operand"
3251 "<scalar_mul_constraint>")
3252 (match_operand:SI 4 "immediate_operand" "i")]
3256 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3257 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3260 (if_then_else (match_test "<Is_float_mode>")
3261 (const_string "neon_fp_mla_s_scalar<q>")
3262 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3265 (define_insn "neon_vmla_lane<mode>"
3266 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3267 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3268 (match_operand:VMQ 2 "s_register_operand" "w")
3269 (match_operand:<V_HALF> 3 "s_register_operand"
3270 "<scalar_mul_constraint>")
3271 (match_operand:SI 4 "immediate_operand" "i")]
3275 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3276 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3279 (if_then_else (match_test "<Is_float_mode>")
3280 (const_string "neon_fp_mla_s_scalar<q>")
3281 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3284 (define_insn "neon_vmlal<sup>_lane<mode>"
3285 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3286 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3287 (match_operand:VMDI 2 "s_register_operand" "w")
3288 (match_operand:VMDI 3 "s_register_operand"
3289 "<scalar_mul_constraint>")
3290 (match_operand:SI 4 "immediate_operand" "i")]
3294 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3295 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3297 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3300 (define_insn "neon_vqdmlal_lane<mode>"
3301 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3302 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3303 (match_operand:VMDI 2 "s_register_operand" "w")
3304 (match_operand:VMDI 3 "s_register_operand"
3305 "<scalar_mul_constraint>")
3306 (match_operand:SI 4 "immediate_operand" "i")]
3307 UNSPEC_VQDMLAL_LANE))]
3310 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3311 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3313 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3316 (define_insn "neon_vmls_lane<mode>"
3317 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3318 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3319 (match_operand:VMD 2 "s_register_operand" "w")
3320 (match_operand:VMD 3 "s_register_operand"
3321 "<scalar_mul_constraint>")
3322 (match_operand:SI 4 "immediate_operand" "i")]
3326 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3327 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3330 (if_then_else (match_test "<Is_float_mode>")
3331 (const_string "neon_fp_mla_s_scalar<q>")
3332 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3335 (define_insn "neon_vmls_lane<mode>"
3336 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3337 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3338 (match_operand:VMQ 2 "s_register_operand" "w")
3339 (match_operand:<V_HALF> 3 "s_register_operand"
3340 "<scalar_mul_constraint>")
3341 (match_operand:SI 4 "immediate_operand" "i")]
3345 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3346 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3349 (if_then_else (match_test "<Is_float_mode>")
3350 (const_string "neon_fp_mla_s_scalar<q>")
3351 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3354 (define_insn "neon_vmlsl<sup>_lane<mode>"
3355 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3356 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3357 (match_operand:VMDI 2 "s_register_operand" "w")
3358 (match_operand:VMDI 3 "s_register_operand"
3359 "<scalar_mul_constraint>")
3360 (match_operand:SI 4 "immediate_operand" "i")]
3364 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3365 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3367 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3370 (define_insn "neon_vqdmlsl_lane<mode>"
3371 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3372 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3373 (match_operand:VMDI 2 "s_register_operand" "w")
3374 (match_operand:VMDI 3 "s_register_operand"
3375 "<scalar_mul_constraint>")
3376 (match_operand:SI 4 "immediate_operand" "i")]
3377 UNSPEC_VQDMLSL_LANE))]
3380 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3381 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3383 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3386 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3387 ; core register into a temp register, then use a scalar taken from that. This
3388 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3389 ; or extracted from another vector. The latter case it's currently better to
3390 ; use the "_lane" variant, and the former case can probably be implemented
3391 ; using vld1_lane, but that hasn't been done yet.
3393 (define_expand "neon_vmul_n<mode>"
3394 [(match_operand:VMD 0 "s_register_operand" "")
3395 (match_operand:VMD 1 "s_register_operand" "")
3396 (match_operand:<V_elem> 2 "s_register_operand" "")]
3399 rtx tmp = gen_reg_rtx (<MODE>mode);
3400 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3401 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3406 (define_expand "neon_vmul_n<mode>"
3407 [(match_operand:VMQ 0 "s_register_operand" "")
3408 (match_operand:VMQ 1 "s_register_operand" "")
3409 (match_operand:<V_elem> 2 "s_register_operand" "")]
3412 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3413 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3414 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3419 (define_expand "neon_vmulls_n<mode>"
3420 [(match_operand:<V_widen> 0 "s_register_operand" "")
3421 (match_operand:VMDI 1 "s_register_operand" "")
3422 (match_operand:<V_elem> 2 "s_register_operand" "")]
3425 rtx tmp = gen_reg_rtx (<MODE>mode);
3426 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3427 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3432 (define_expand "neon_vmullu_n<mode>"
3433 [(match_operand:<V_widen> 0 "s_register_operand" "")
3434 (match_operand:VMDI 1 "s_register_operand" "")
3435 (match_operand:<V_elem> 2 "s_register_operand" "")]
3438 rtx tmp = gen_reg_rtx (<MODE>mode);
3439 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3440 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3445 (define_expand "neon_vqdmull_n<mode>"
3446 [(match_operand:<V_widen> 0 "s_register_operand" "")
3447 (match_operand:VMDI 1 "s_register_operand" "")
3448 (match_operand:<V_elem> 2 "s_register_operand" "")]
3451 rtx tmp = gen_reg_rtx (<MODE>mode);
3452 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3453 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3458 (define_expand "neon_vqdmulh_n<mode>"
3459 [(match_operand:VMDI 0 "s_register_operand" "")
3460 (match_operand:VMDI 1 "s_register_operand" "")
3461 (match_operand:<V_elem> 2 "s_register_operand" "")]
3464 rtx tmp = gen_reg_rtx (<MODE>mode);
3465 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3466 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3471 (define_expand "neon_vqrdmulh_n<mode>"
3472 [(match_operand:VMDI 0 "s_register_operand" "")
3473 (match_operand:VMDI 1 "s_register_operand" "")
3474 (match_operand:<V_elem> 2 "s_register_operand" "")]
3477 rtx tmp = gen_reg_rtx (<MODE>mode);
3478 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3479 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3484 (define_expand "neon_vqdmulh_n<mode>"
3485 [(match_operand:VMQI 0 "s_register_operand" "")
3486 (match_operand:VMQI 1 "s_register_operand" "")
3487 (match_operand:<V_elem> 2 "s_register_operand" "")]
3490 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3491 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3492 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3497 (define_expand "neon_vqrdmulh_n<mode>"
3498 [(match_operand:VMQI 0 "s_register_operand" "")
3499 (match_operand:VMQI 1 "s_register_operand" "")
3500 (match_operand:<V_elem> 2 "s_register_operand" "")]
3503 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3504 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3505 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3510 (define_expand "neon_vmla_n<mode>"
3511 [(match_operand:VMD 0 "s_register_operand" "")
3512 (match_operand:VMD 1 "s_register_operand" "")
3513 (match_operand:VMD 2 "s_register_operand" "")
3514 (match_operand:<V_elem> 3 "s_register_operand" "")]
3517 rtx tmp = gen_reg_rtx (<MODE>mode);
3518 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3519 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3524 (define_expand "neon_vmla_n<mode>"
3525 [(match_operand:VMQ 0 "s_register_operand" "")
3526 (match_operand:VMQ 1 "s_register_operand" "")
3527 (match_operand:VMQ 2 "s_register_operand" "")
3528 (match_operand:<V_elem> 3 "s_register_operand" "")]
3531 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3532 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3533 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3538 (define_expand "neon_vmlals_n<mode>"
3539 [(match_operand:<V_widen> 0 "s_register_operand" "")
3540 (match_operand:<V_widen> 1 "s_register_operand" "")
3541 (match_operand:VMDI 2 "s_register_operand" "")
3542 (match_operand:<V_elem> 3 "s_register_operand" "")]
3545 rtx tmp = gen_reg_rtx (<MODE>mode);
3546 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3547 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3552 (define_expand "neon_vmlalu_n<mode>"
3553 [(match_operand:<V_widen> 0 "s_register_operand" "")
3554 (match_operand:<V_widen> 1 "s_register_operand" "")
3555 (match_operand:VMDI 2 "s_register_operand" "")
3556 (match_operand:<V_elem> 3 "s_register_operand" "")]
3559 rtx tmp = gen_reg_rtx (<MODE>mode);
3560 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3561 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3566 (define_expand "neon_vqdmlal_n<mode>"
3567 [(match_operand:<V_widen> 0 "s_register_operand" "")
3568 (match_operand:<V_widen> 1 "s_register_operand" "")
3569 (match_operand:VMDI 2 "s_register_operand" "")
3570 (match_operand:<V_elem> 3 "s_register_operand" "")]
3573 rtx tmp = gen_reg_rtx (<MODE>mode);
3574 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3575 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3580 (define_expand "neon_vmls_n<mode>"
3581 [(match_operand:VMD 0 "s_register_operand" "")
3582 (match_operand:VMD 1 "s_register_operand" "")
3583 (match_operand:VMD 2 "s_register_operand" "")
3584 (match_operand:<V_elem> 3 "s_register_operand" "")]
3587 rtx tmp = gen_reg_rtx (<MODE>mode);
3588 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3589 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3594 (define_expand "neon_vmls_n<mode>"
3595 [(match_operand:VMQ 0 "s_register_operand" "")
3596 (match_operand:VMQ 1 "s_register_operand" "")
3597 (match_operand:VMQ 2 "s_register_operand" "")
3598 (match_operand:<V_elem> 3 "s_register_operand" "")]
3601 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3602 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3603 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3608 (define_expand "neon_vmlsls_n<mode>"
3609 [(match_operand:<V_widen> 0 "s_register_operand" "")
3610 (match_operand:<V_widen> 1 "s_register_operand" "")
3611 (match_operand:VMDI 2 "s_register_operand" "")
3612 (match_operand:<V_elem> 3 "s_register_operand" "")]
3615 rtx tmp = gen_reg_rtx (<MODE>mode);
3616 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3617 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3622 (define_expand "neon_vmlslu_n<mode>"
3623 [(match_operand:<V_widen> 0 "s_register_operand" "")
3624 (match_operand:<V_widen> 1 "s_register_operand" "")
3625 (match_operand:VMDI 2 "s_register_operand" "")
3626 (match_operand:<V_elem> 3 "s_register_operand" "")]
3629 rtx tmp = gen_reg_rtx (<MODE>mode);
3630 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3631 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3636 (define_expand "neon_vqdmlsl_n<mode>"
3637 [(match_operand:<V_widen> 0 "s_register_operand" "")
3638 (match_operand:<V_widen> 1 "s_register_operand" "")
3639 (match_operand:VMDI 2 "s_register_operand" "")
3640 (match_operand:<V_elem> 3 "s_register_operand" "")]
3643 rtx tmp = gen_reg_rtx (<MODE>mode);
3644 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3645 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3650 (define_insn "neon_vext<mode>"
3651 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3652 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3653 (match_operand:VDQX 2 "s_register_operand" "w")
3654 (match_operand:SI 3 "immediate_operand" "i")]
3658 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3659 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3661 [(set_attr "type" "neon_ext<q>")]
3664 (define_insn "neon_vrev64<mode>"
3665 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3666 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3669 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3670 [(set_attr "type" "neon_rev<q>")]
3673 (define_insn "neon_vrev32<mode>"
3674 [(set (match_operand:VX 0 "s_register_operand" "=w")
3675 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3678 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3679 [(set_attr "type" "neon_rev<q>")]
3682 (define_insn "neon_vrev16<mode>"
3683 [(set (match_operand:VE 0 "s_register_operand" "=w")
3684 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3687 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3688 [(set_attr "type" "neon_rev<q>")]
3691 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3692 ; allocation. For an intrinsic of form:
3693 ; rD = vbsl_* (rS, rN, rM)
3694 ; We can use any of:
3695 ; vbsl rS, rN, rM (if D = S)
3696 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3697 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3699 (define_insn "neon_vbsl<mode>_internal"
3700 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3701 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3702 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3703 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3707 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3708 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3709 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3710 [(set_attr "type" "neon_bsl<q>")]
3713 (define_expand "neon_vbsl<mode>"
3714 [(set (match_operand:VDQX 0 "s_register_operand" "")
3715 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3716 (match_operand:VDQX 2 "s_register_operand" "")
3717 (match_operand:VDQX 3 "s_register_operand" "")]
3721 /* We can't alias operands together if they have different modes. */
3722 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3726 (define_insn "neon_v<shift_op><sup><mode>"
3727 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3728 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3729 (match_operand:VDQIX 2 "s_register_operand" "w")]
3732 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3733 [(set_attr "type" "neon_shift_imm<q>")]
3737 (define_insn "neon_v<shift_op><sup><mode>"
3738 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3739 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3740 (match_operand:VDQIX 2 "s_register_operand" "w")]
3743 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3744 [(set_attr "type" "neon_sat_shift_imm<q>")]
3748 (define_insn "neon_v<shift_op><sup>_n<mode>"
3749 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3750 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3751 (match_operand:SI 2 "immediate_operand" "i")]
3755 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3756 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3758 [(set_attr "type" "neon_shift_imm<q>")]
3761 ;; vshrn_n, vrshrn_n
3762 (define_insn "neon_v<shift_op>_n<mode>"
3763 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3764 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3765 (match_operand:SI 2 "immediate_operand" "i")]
3769 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3770 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3772 [(set_attr "type" "neon_shift_imm_narrow_q")]
3775 ;; vqshrn_n, vqrshrn_n
3776 (define_insn "neon_v<shift_op><sup>_n<mode>"
3777 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3778 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3779 (match_operand:SI 2 "immediate_operand" "i")]
3783 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3784 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3786 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3789 ;; vqshrun_n, vqrshrun_n
3790 (define_insn "neon_v<shift_op>_n<mode>"
3791 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3792 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3793 (match_operand:SI 2 "immediate_operand" "i")]
3797 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3798 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3800 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3803 (define_insn "neon_vshl_n<mode>"
3804 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3805 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3806 (match_operand:SI 2 "immediate_operand" "i")]
3810 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3811 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3813 [(set_attr "type" "neon_shift_imm<q>")]
3816 (define_insn "neon_vqshl_<sup>_n<mode>"
3817 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3818 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3819 (match_operand:SI 2 "immediate_operand" "i")]
3823 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3824 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3826 [(set_attr "type" "neon_sat_shift_imm<q>")]
3829 (define_insn "neon_vqshlu_n<mode>"
3830 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3831 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3832 (match_operand:SI 2 "immediate_operand" "i")]
3836 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3837 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3839 [(set_attr "type" "neon_sat_shift_imm<q>")]
3842 (define_insn "neon_vshll<sup>_n<mode>"
3843 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3844 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3845 (match_operand:SI 2 "immediate_operand" "i")]
3849 /* The boundaries are: 0 < imm <= size. */
3850 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3851 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3853 [(set_attr "type" "neon_shift_imm_long")]
3857 (define_insn "neon_v<shift_op><sup>_n<mode>"
3858 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3859 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3860 (match_operand:VDQIX 2 "s_register_operand" "w")
3861 (match_operand:SI 3 "immediate_operand" "i")]
3865 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3866 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3868 [(set_attr "type" "neon_shift_acc<q>")]
3871 (define_insn "neon_vsri_n<mode>"
3872 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3873 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3874 (match_operand:VDQIX 2 "s_register_operand" "w")
3875 (match_operand:SI 3 "immediate_operand" "i")]
3879 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3880 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3882 [(set_attr "type" "neon_shift_reg<q>")]
3885 (define_insn "neon_vsli_n<mode>"
3886 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3887 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3888 (match_operand:VDQIX 2 "s_register_operand" "w")
3889 (match_operand:SI 3 "immediate_operand" "i")]
3893 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3894 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3896 [(set_attr "type" "neon_shift_reg<q>")]
3899 (define_insn "neon_vtbl1v8qi"
3900 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3901 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3902 (match_operand:V8QI 2 "s_register_operand" "w")]
3905 "vtbl.8\t%P0, {%P1}, %P2"
3906 [(set_attr "type" "neon_tbl1")]
3909 (define_insn "neon_vtbl2v8qi"
3910 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3911 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3912 (match_operand:V8QI 2 "s_register_operand" "w")]
3917 int tabbase = REGNO (operands[1]);
3919 ops[0] = operands[0];
3920 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3921 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3922 ops[3] = operands[2];
3923 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3927 [(set_attr "type" "neon_tbl2")]
3930 (define_insn "neon_vtbl3v8qi"
3931 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3932 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3933 (match_operand:V8QI 2 "s_register_operand" "w")]
3938 int tabbase = REGNO (operands[1]);
3940 ops[0] = operands[0];
3941 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3942 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3943 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3944 ops[4] = operands[2];
3945 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3949 [(set_attr "type" "neon_tbl3")]
3952 (define_insn "neon_vtbl4v8qi"
3953 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3954 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3955 (match_operand:V8QI 2 "s_register_operand" "w")]
3960 int tabbase = REGNO (operands[1]);
3962 ops[0] = operands[0];
3963 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3964 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3965 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3966 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3967 ops[5] = operands[2];
3968 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3972 [(set_attr "type" "neon_tbl4")]
3975 ;; These three are used by the vec_perm infrastructure for V16QImode.
3976 (define_insn_and_split "neon_vtbl1v16qi"
3977 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3978 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3979 (match_operand:V16QI 2 "s_register_operand" "w")]
3983 "&& reload_completed"
3986 rtx op0, op1, op2, part0, part2;
3990 op1 = gen_lowpart (TImode, operands[1]);
3993 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3994 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3995 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3996 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3998 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3999 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4000 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4001 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4004 [(set_attr "type" "multiple")]
4007 (define_insn_and_split "neon_vtbl2v16qi"
4008 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4009 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4010 (match_operand:V16QI 2 "s_register_operand" "w")]
4014 "&& reload_completed"
4017 rtx op0, op1, op2, part0, part2;
4024 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4025 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4026 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4027 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4029 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4030 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4031 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4032 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4035 [(set_attr "type" "multiple")]
4038 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4039 ;; handle quad-word input modes, producing octa-word output modes. But
4040 ;; that requires us to add support for octa-word vector modes in moves.
4041 ;; That seems overkill for this one use in vec_perm.
4042 (define_insn_and_split "neon_vcombinev16qi"
4043 [(set (match_operand:OI 0 "s_register_operand" "=w")
4044 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4045 (match_operand:V16QI 2 "s_register_operand" "w")]
4049 "&& reload_completed"
4052 neon_split_vcombine (operands);
4055 [(set_attr "type" "multiple")]
4058 (define_insn "neon_vtbx1v8qi"
4059 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4060 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4061 (match_operand:V8QI 2 "s_register_operand" "w")
4062 (match_operand:V8QI 3 "s_register_operand" "w")]
4065 "vtbx.8\t%P0, {%P2}, %P3"
4066 [(set_attr "type" "neon_tbl1")]
4069 (define_insn "neon_vtbx2v8qi"
4070 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4071 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4072 (match_operand:TI 2 "s_register_operand" "w")
4073 (match_operand:V8QI 3 "s_register_operand" "w")]
4078 int tabbase = REGNO (operands[2]);
4080 ops[0] = operands[0];
4081 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4082 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4083 ops[3] = operands[3];
4084 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4088 [(set_attr "type" "neon_tbl2")]
4091 (define_insn "neon_vtbx3v8qi"
4092 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4093 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4094 (match_operand:EI 2 "s_register_operand" "w")
4095 (match_operand:V8QI 3 "s_register_operand" "w")]
4100 int tabbase = REGNO (operands[2]);
4102 ops[0] = operands[0];
4103 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4104 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4105 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4106 ops[4] = operands[3];
4107 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4111 [(set_attr "type" "neon_tbl3")]
4114 (define_insn "neon_vtbx4v8qi"
4115 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4116 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4117 (match_operand:OI 2 "s_register_operand" "w")
4118 (match_operand:V8QI 3 "s_register_operand" "w")]
4123 int tabbase = REGNO (operands[2]);
4125 ops[0] = operands[0];
4126 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4127 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4128 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4129 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4130 ops[5] = operands[3];
4131 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4135 [(set_attr "type" "neon_tbl4")]
4138 (define_expand "neon_vtrn<mode>_internal"
4140 [(set (match_operand:VDQW 0 "s_register_operand" "")
4141 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4142 (match_operand:VDQW 2 "s_register_operand" "")]
4144 (set (match_operand:VDQW 3 "s_register_operand" "")
4145 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4150 ;; Note: Different operand numbering to handle tied registers correctly.
4151 (define_insn "*neon_vtrn<mode>_insn"
4152 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4153 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4154 (match_operand:VDQW 3 "s_register_operand" "2")]
4156 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4157 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4160 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4161 [(set_attr "type" "neon_permute<q>")]
4164 (define_expand "neon_vzip<mode>_internal"
4166 [(set (match_operand:VDQW 0 "s_register_operand" "")
4167 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4168 (match_operand:VDQW 2 "s_register_operand" "")]
4170 (set (match_operand:VDQW 3 "s_register_operand" "")
4171 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4176 ;; Note: Different operand numbering to handle tied registers correctly.
4177 (define_insn "*neon_vzip<mode>_insn"
4178 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4179 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4180 (match_operand:VDQW 3 "s_register_operand" "2")]
4182 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4183 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4186 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4187 [(set_attr "type" "neon_zip<q>")]
4190 (define_expand "neon_vuzp<mode>_internal"
4192 [(set (match_operand:VDQW 0 "s_register_operand" "")
4193 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4194 (match_operand:VDQW 2 "s_register_operand" "")]
4196 (set (match_operand:VDQW 3 "s_register_operand" "")
4197 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4202 ;; Note: Different operand numbering to handle tied registers correctly.
4203 (define_insn "*neon_vuzp<mode>_insn"
4204 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4205 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4206 (match_operand:VDQW 3 "s_register_operand" "2")]
4208 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4209 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4212 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4213 [(set_attr "type" "neon_zip<q>")]
4216 (define_expand "neon_vreinterpretv8qi<mode>"
4217 [(match_operand:V8QI 0 "s_register_operand" "")
4218 (match_operand:VDX 1 "s_register_operand" "")]
4221 neon_reinterpret (operands[0], operands[1]);
4225 (define_expand "neon_vreinterpretv4hi<mode>"
4226 [(match_operand:V4HI 0 "s_register_operand" "")
4227 (match_operand:VDX 1 "s_register_operand" "")]
4230 neon_reinterpret (operands[0], operands[1]);
4234 (define_expand "neon_vreinterpretv2si<mode>"
4235 [(match_operand:V2SI 0 "s_register_operand" "")
4236 (match_operand:VDX 1 "s_register_operand" "")]
4239 neon_reinterpret (operands[0], operands[1]);
4243 (define_expand "neon_vreinterpretv2sf<mode>"
4244 [(match_operand:V2SF 0 "s_register_operand" "")
4245 (match_operand:VDX 1 "s_register_operand" "")]
4248 neon_reinterpret (operands[0], operands[1]);
4252 (define_expand "neon_vreinterpretdi<mode>"
4253 [(match_operand:DI 0 "s_register_operand" "")
4254 (match_operand:VDX 1 "s_register_operand" "")]
4257 neon_reinterpret (operands[0], operands[1]);
4261 (define_expand "neon_vreinterpretti<mode>"
4262 [(match_operand:TI 0 "s_register_operand" "")
4263 (match_operand:VQXMOV 1 "s_register_operand" "")]
4266 neon_reinterpret (operands[0], operands[1]);
4271 (define_expand "neon_vreinterpretv16qi<mode>"
4272 [(match_operand:V16QI 0 "s_register_operand" "")
4273 (match_operand:VQXMOV 1 "s_register_operand" "")]
4276 neon_reinterpret (operands[0], operands[1]);
4280 (define_expand "neon_vreinterpretv8hi<mode>"
4281 [(match_operand:V8HI 0 "s_register_operand" "")
4282 (match_operand:VQXMOV 1 "s_register_operand" "")]
4285 neon_reinterpret (operands[0], operands[1]);
4289 (define_expand "neon_vreinterpretv4si<mode>"
4290 [(match_operand:V4SI 0 "s_register_operand" "")
4291 (match_operand:VQXMOV 1 "s_register_operand" "")]
4294 neon_reinterpret (operands[0], operands[1]);
4298 (define_expand "neon_vreinterpretv4sf<mode>"
4299 [(match_operand:V4SF 0 "s_register_operand" "")
4300 (match_operand:VQXMOV 1 "s_register_operand" "")]
4303 neon_reinterpret (operands[0], operands[1]);
4307 (define_expand "neon_vreinterpretv2di<mode>"
4308 [(match_operand:V2DI 0 "s_register_operand" "")
4309 (match_operand:VQXMOV 1 "s_register_operand" "")]
4312 neon_reinterpret (operands[0], operands[1]);
4316 (define_expand "vec_load_lanes<mode><mode>"
4317 [(set (match_operand:VDQX 0 "s_register_operand")
4318 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4322 (define_insn "neon_vld1<mode>"
4323 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4324 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4327 "vld1.<V_sz_elem>\t%h0, %A1"
4328 [(set_attr "type" "neon_load1_1reg<q>")]
4331 (define_insn "neon_vld1_lane<mode>"
4332 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4333 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4334 (match_operand:VDX 2 "s_register_operand" "0")
4335 (match_operand:SI 3 "immediate_operand" "i")]
4339 HOST_WIDE_INT lane = INTVAL (operands[3]);
4340 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4341 if (lane < 0 || lane >= max)
4342 error ("lane out of range");
4344 return "vld1.<V_sz_elem>\t%P0, %A1";
4346 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4348 [(set_attr "type" "neon_load1_one_lane<q>")]
4351 (define_insn "neon_vld1_lane<mode>"
4352 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4353 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4354 (match_operand:VQX 2 "s_register_operand" "0")
4355 (match_operand:SI 3 "immediate_operand" "i")]
4359 HOST_WIDE_INT lane = INTVAL (operands[3]);
4360 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4361 int regno = REGNO (operands[0]);
4362 if (lane < 0 || lane >= max)
4363 error ("lane out of range");
4364 else if (lane >= max / 2)
4368 operands[3] = GEN_INT (lane);
4370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4372 return "vld1.<V_sz_elem>\t%P0, %A1";
4374 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4376 [(set_attr "type" "neon_load1_one_lane<q>")]
4379 (define_insn "neon_vld1_dup<mode>"
4380 [(set (match_operand:VD 0 "s_register_operand" "=w")
4381 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4383 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4384 [(set_attr "type" "neon_load1_all_lanes<q>")]
4387 ;; Special case for DImode. Treat it exactly like a simple load.
4388 (define_expand "neon_vld1_dupdi"
4389 [(set (match_operand:DI 0 "s_register_operand" "")
4390 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4396 (define_insn "neon_vld1_dup<mode>"
4397 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4398 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4401 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4403 [(set_attr "type" "neon_load1_all_lanes<q>")]
4406 (define_insn_and_split "neon_vld1_dupv2di"
4407 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4408 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4411 "&& reload_completed"
4414 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4415 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4416 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4419 [(set_attr "length" "8")
4420 (set_attr "type" "neon_load1_all_lanes_q")]
4423 (define_expand "vec_store_lanes<mode><mode>"
4424 [(set (match_operand:VDQX 0 "neon_struct_operand")
4425 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4429 (define_insn "neon_vst1<mode>"
4430 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4431 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4434 "vst1.<V_sz_elem>\t%h1, %A0"
4435 [(set_attr "type" "neon_store1_1reg<q>")])
4437 (define_insn "neon_vst1_lane<mode>"
4438 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4440 [(match_operand:VDX 1 "s_register_operand" "w")
4441 (match_operand:SI 2 "immediate_operand" "i")]
4445 HOST_WIDE_INT lane = INTVAL (operands[2]);
4446 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4447 if (lane < 0 || lane >= max)
4448 error ("lane out of range");
4450 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4452 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4454 [(set_attr "type" "neon_store1_one_lane<q>")]
4457 (define_insn "neon_vst1_lane<mode>"
4458 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4460 [(match_operand:VQX 1 "s_register_operand" "w")
4461 (match_operand:SI 2 "immediate_operand" "i")]
4465 HOST_WIDE_INT lane = INTVAL (operands[2]);
4466 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4467 int regno = REGNO (operands[1]);
4468 if (lane < 0 || lane >= max)
4469 error ("lane out of range");
4470 else if (lane >= max / 2)
4474 operands[2] = GEN_INT (lane);
4476 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4478 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4480 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4482 [(set_attr "type" "neon_store1_one_lane<q>")]
4485 (define_expand "vec_load_lanesti<mode>"
4486 [(set (match_operand:TI 0 "s_register_operand")
4487 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4488 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4492 (define_insn "neon_vld2<mode>"
4493 [(set (match_operand:TI 0 "s_register_operand" "=w")
4494 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4495 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4499 if (<V_sz_elem> == 64)
4500 return "vld1.64\t%h0, %A1";
4502 return "vld2.<V_sz_elem>\t%h0, %A1";
4505 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4506 (const_string "neon_load1_2reg<q>")
4507 (const_string "neon_load2_2reg<q>")))]
4510 (define_expand "vec_load_lanesoi<mode>"
4511 [(set (match_operand:OI 0 "s_register_operand")
4512 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4513 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4517 (define_insn "neon_vld2<mode>"
4518 [(set (match_operand:OI 0 "s_register_operand" "=w")
4519 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4520 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4523 "vld2.<V_sz_elem>\t%h0, %A1"
4524 [(set_attr "type" "neon_load2_2reg_q")])
4526 (define_insn "neon_vld2_lane<mode>"
4527 [(set (match_operand:TI 0 "s_register_operand" "=w")
4528 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4529 (match_operand:TI 2 "s_register_operand" "0")
4530 (match_operand:SI 3 "immediate_operand" "i")
4531 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4535 HOST_WIDE_INT lane = INTVAL (operands[3]);
4536 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4537 int regno = REGNO (operands[0]);
4539 if (lane < 0 || lane >= max)
4540 error ("lane out of range");
4541 ops[0] = gen_rtx_REG (DImode, regno);
4542 ops[1] = gen_rtx_REG (DImode, regno + 2);
4543 ops[2] = operands[1];
4544 ops[3] = operands[3];
4545 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4548 [(set_attr "type" "neon_load2_one_lane<q>")]
4551 (define_insn "neon_vld2_lane<mode>"
4552 [(set (match_operand:OI 0 "s_register_operand" "=w")
4553 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4554 (match_operand:OI 2 "s_register_operand" "0")
4555 (match_operand:SI 3 "immediate_operand" "i")
4556 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4560 HOST_WIDE_INT lane = INTVAL (operands[3]);
4561 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4562 int regno = REGNO (operands[0]);
4564 if (lane < 0 || lane >= max)
4565 error ("lane out of range");
4566 else if (lane >= max / 2)
4571 ops[0] = gen_rtx_REG (DImode, regno);
4572 ops[1] = gen_rtx_REG (DImode, regno + 4);
4573 ops[2] = operands[1];
4574 ops[3] = GEN_INT (lane);
4575 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4578 [(set_attr "type" "neon_load2_one_lane<q>")]
4581 (define_insn "neon_vld2_dup<mode>"
4582 [(set (match_operand:TI 0 "s_register_operand" "=w")
4583 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4584 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4588 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4589 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4591 return "vld1.<V_sz_elem>\t%h0, %A1";
4594 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4595 (const_string "neon_load2_all_lanes<q>")
4596 (const_string "neon_load1_1reg<q>")))]
4599 (define_expand "vec_store_lanesti<mode>"
4600 [(set (match_operand:TI 0 "neon_struct_operand")
4601 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4602 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4606 (define_insn "neon_vst2<mode>"
4607 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4608 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4609 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4613 if (<V_sz_elem> == 64)
4614 return "vst1.64\t%h1, %A0";
4616 return "vst2.<V_sz_elem>\t%h1, %A0";
4619 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4620 (const_string "neon_store1_2reg<q>")
4621 (const_string "neon_store2_one_lane<q>")))]
4624 (define_expand "vec_store_lanesoi<mode>"
4625 [(set (match_operand:OI 0 "neon_struct_operand")
4626 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4627 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4631 (define_insn "neon_vst2<mode>"
4632 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4633 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4634 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4637 "vst2.<V_sz_elem>\t%h1, %A0"
4638 [(set_attr "type" "neon_store2_4reg<q>")]
4641 (define_insn "neon_vst2_lane<mode>"
4642 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4643 (unspec:<V_two_elem>
4644 [(match_operand:TI 1 "s_register_operand" "w")
4645 (match_operand:SI 2 "immediate_operand" "i")
4646 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4650 HOST_WIDE_INT lane = INTVAL (operands[2]);
4651 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4652 int regno = REGNO (operands[1]);
4654 if (lane < 0 || lane >= max)
4655 error ("lane out of range");
4656 ops[0] = operands[0];
4657 ops[1] = gen_rtx_REG (DImode, regno);
4658 ops[2] = gen_rtx_REG (DImode, regno + 2);
4659 ops[3] = operands[2];
4660 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4663 [(set_attr "type" "neon_store2_one_lane<q>")]
4666 (define_insn "neon_vst2_lane<mode>"
4667 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4668 (unspec:<V_two_elem>
4669 [(match_operand:OI 1 "s_register_operand" "w")
4670 (match_operand:SI 2 "immediate_operand" "i")
4671 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4675 HOST_WIDE_INT lane = INTVAL (operands[2]);
4676 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4677 int regno = REGNO (operands[1]);
4679 if (lane < 0 || lane >= max)
4680 error ("lane out of range");
4681 else if (lane >= max / 2)
4686 ops[0] = operands[0];
4687 ops[1] = gen_rtx_REG (DImode, regno);
4688 ops[2] = gen_rtx_REG (DImode, regno + 4);
4689 ops[3] = GEN_INT (lane);
4690 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4693 [(set_attr "type" "neon_store2_one_lane<q>")]
4696 (define_expand "vec_load_lanesei<mode>"
4697 [(set (match_operand:EI 0 "s_register_operand")
4698 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4699 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4703 (define_insn "neon_vld3<mode>"
4704 [(set (match_operand:EI 0 "s_register_operand" "=w")
4705 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4706 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4710 if (<V_sz_elem> == 64)
4711 return "vld1.64\t%h0, %A1";
4713 return "vld3.<V_sz_elem>\t%h0, %A1";
4716 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4717 (const_string "neon_load1_3reg<q>")
4718 (const_string "neon_load3_3reg<q>")))]
4721 (define_expand "vec_load_lanesci<mode>"
4722 [(match_operand:CI 0 "s_register_operand")
4723 (match_operand:CI 1 "neon_struct_operand")
4724 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4727 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4731 (define_expand "neon_vld3<mode>"
4732 [(match_operand:CI 0 "s_register_operand")
4733 (match_operand:CI 1 "neon_struct_operand")
4734 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4739 mem = adjust_address (operands[1], EImode, 0);
4740 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4741 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4742 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4746 (define_insn "neon_vld3qa<mode>"
4747 [(set (match_operand:CI 0 "s_register_operand" "=w")
4748 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4749 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4753 int regno = REGNO (operands[0]);
4755 ops[0] = gen_rtx_REG (DImode, regno);
4756 ops[1] = gen_rtx_REG (DImode, regno + 4);
4757 ops[2] = gen_rtx_REG (DImode, regno + 8);
4758 ops[3] = operands[1];
4759 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4762 [(set_attr "type" "neon_load3_3reg<q>")]
4765 (define_insn "neon_vld3qb<mode>"
4766 [(set (match_operand:CI 0 "s_register_operand" "=w")
4767 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4768 (match_operand:CI 2 "s_register_operand" "0")
4769 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4773 int regno = REGNO (operands[0]);
4775 ops[0] = gen_rtx_REG (DImode, regno + 2);
4776 ops[1] = gen_rtx_REG (DImode, regno + 6);
4777 ops[2] = gen_rtx_REG (DImode, regno + 10);
4778 ops[3] = operands[1];
4779 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4782 [(set_attr "type" "neon_load3_3reg<q>")]
4785 (define_insn "neon_vld3_lane<mode>"
4786 [(set (match_operand:EI 0 "s_register_operand" "=w")
4787 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4788 (match_operand:EI 2 "s_register_operand" "0")
4789 (match_operand:SI 3 "immediate_operand" "i")
4790 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4794 HOST_WIDE_INT lane = INTVAL (operands[3]);
4795 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4796 int regno = REGNO (operands[0]);
4798 if (lane < 0 || lane >= max)
4799 error ("lane out of range");
4800 ops[0] = gen_rtx_REG (DImode, regno);
4801 ops[1] = gen_rtx_REG (DImode, regno + 2);
4802 ops[2] = gen_rtx_REG (DImode, regno + 4);
4803 ops[3] = operands[1];
4804 ops[4] = operands[3];
4805 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4809 [(set_attr "type" "neon_load3_one_lane<q>")]
4812 (define_insn "neon_vld3_lane<mode>"
4813 [(set (match_operand:CI 0 "s_register_operand" "=w")
4814 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4815 (match_operand:CI 2 "s_register_operand" "0")
4816 (match_operand:SI 3 "immediate_operand" "i")
4817 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4821 HOST_WIDE_INT lane = INTVAL (operands[3]);
4822 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4823 int regno = REGNO (operands[0]);
4825 if (lane < 0 || lane >= max)
4826 error ("lane out of range");
4827 else if (lane >= max / 2)
4832 ops[0] = gen_rtx_REG (DImode, regno);
4833 ops[1] = gen_rtx_REG (DImode, regno + 4);
4834 ops[2] = gen_rtx_REG (DImode, regno + 8);
4835 ops[3] = operands[1];
4836 ops[4] = GEN_INT (lane);
4837 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4841 [(set_attr "type" "neon_load3_one_lane<q>")]
4844 (define_insn "neon_vld3_dup<mode>"
4845 [(set (match_operand:EI 0 "s_register_operand" "=w")
4846 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4847 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4851 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4853 int regno = REGNO (operands[0]);
4855 ops[0] = gen_rtx_REG (DImode, regno);
4856 ops[1] = gen_rtx_REG (DImode, regno + 2);
4857 ops[2] = gen_rtx_REG (DImode, regno + 4);
4858 ops[3] = operands[1];
4859 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4863 return "vld1.<V_sz_elem>\t%h0, %A1";
4866 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4867 (const_string "neon_load3_all_lanes<q>")
4868 (const_string "neon_load1_1reg<q>")))])
4870 (define_expand "vec_store_lanesei<mode>"
4871 [(set (match_operand:EI 0 "neon_struct_operand")
4872 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4873 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4877 (define_insn "neon_vst3<mode>"
4878 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4879 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4880 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4884 if (<V_sz_elem> == 64)
4885 return "vst1.64\t%h1, %A0";
4887 return "vst3.<V_sz_elem>\t%h1, %A0";
4890 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4891 (const_string "neon_store1_3reg<q>")
4892 (const_string "neon_store3_one_lane<q>")))])
4894 (define_expand "vec_store_lanesci<mode>"
4895 [(match_operand:CI 0 "neon_struct_operand")
4896 (match_operand:CI 1 "s_register_operand")
4897 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4900 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4904 (define_expand "neon_vst3<mode>"
4905 [(match_operand:CI 0 "neon_struct_operand")
4906 (match_operand:CI 1 "s_register_operand")
4907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4912 mem = adjust_address (operands[0], EImode, 0);
4913 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4914 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4915 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4919 (define_insn "neon_vst3qa<mode>"
4920 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4921 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4922 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 int regno = REGNO (operands[1]);
4928 ops[0] = operands[0];
4929 ops[1] = gen_rtx_REG (DImode, regno);
4930 ops[2] = gen_rtx_REG (DImode, regno + 4);
4931 ops[3] = gen_rtx_REG (DImode, regno + 8);
4932 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4935 [(set_attr "type" "neon_store3_3reg<q>")]
4938 (define_insn "neon_vst3qb<mode>"
4939 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4940 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4941 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 int regno = REGNO (operands[1]);
4947 ops[0] = operands[0];
4948 ops[1] = gen_rtx_REG (DImode, regno + 2);
4949 ops[2] = gen_rtx_REG (DImode, regno + 6);
4950 ops[3] = gen_rtx_REG (DImode, regno + 10);
4951 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4954 [(set_attr "type" "neon_store3_3reg<q>")]
4957 (define_insn "neon_vst3_lane<mode>"
4958 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4959 (unspec:<V_three_elem>
4960 [(match_operand:EI 1 "s_register_operand" "w")
4961 (match_operand:SI 2 "immediate_operand" "i")
4962 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4966 HOST_WIDE_INT lane = INTVAL (operands[2]);
4967 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4968 int regno = REGNO (operands[1]);
4970 if (lane < 0 || lane >= max)
4971 error ("lane out of range");
4972 ops[0] = operands[0];
4973 ops[1] = gen_rtx_REG (DImode, regno);
4974 ops[2] = gen_rtx_REG (DImode, regno + 2);
4975 ops[3] = gen_rtx_REG (DImode, regno + 4);
4976 ops[4] = operands[2];
4977 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4981 [(set_attr "type" "neon_store3_one_lane<q>")]
4984 (define_insn "neon_vst3_lane<mode>"
4985 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4986 (unspec:<V_three_elem>
4987 [(match_operand:CI 1 "s_register_operand" "w")
4988 (match_operand:SI 2 "immediate_operand" "i")
4989 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4993 HOST_WIDE_INT lane = INTVAL (operands[2]);
4994 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4995 int regno = REGNO (operands[1]);
4997 if (lane < 0 || lane >= max)
4998 error ("lane out of range");
4999 else if (lane >= max / 2)
5004 ops[0] = operands[0];
5005 ops[1] = gen_rtx_REG (DImode, regno);
5006 ops[2] = gen_rtx_REG (DImode, regno + 4);
5007 ops[3] = gen_rtx_REG (DImode, regno + 8);
5008 ops[4] = GEN_INT (lane);
5009 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5013 [(set_attr "type" "neon_store3_one_lane<q>")]
5016 (define_expand "vec_load_lanesoi<mode>"
5017 [(set (match_operand:OI 0 "s_register_operand")
5018 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5019 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5023 (define_insn "neon_vld4<mode>"
5024 [(set (match_operand:OI 0 "s_register_operand" "=w")
5025 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5026 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030 if (<V_sz_elem> == 64)
5031 return "vld1.64\t%h0, %A1";
5033 return "vld4.<V_sz_elem>\t%h0, %A1";
5036 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5037 (const_string "neon_load1_4reg<q>")
5038 (const_string "neon_load4_4reg<q>")))]
5041 (define_expand "vec_load_lanesxi<mode>"
5042 [(match_operand:XI 0 "s_register_operand")
5043 (match_operand:XI 1 "neon_struct_operand")
5044 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5047 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5051 (define_expand "neon_vld4<mode>"
5052 [(match_operand:XI 0 "s_register_operand")
5053 (match_operand:XI 1 "neon_struct_operand")
5054 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 mem = adjust_address (operands[1], OImode, 0);
5060 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5061 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5062 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5066 (define_insn "neon_vld4qa<mode>"
5067 [(set (match_operand:XI 0 "s_register_operand" "=w")
5068 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5069 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5073 int regno = REGNO (operands[0]);
5075 ops[0] = gen_rtx_REG (DImode, regno);
5076 ops[1] = gen_rtx_REG (DImode, regno + 4);
5077 ops[2] = gen_rtx_REG (DImode, regno + 8);
5078 ops[3] = gen_rtx_REG (DImode, regno + 12);
5079 ops[4] = operands[1];
5080 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5083 [(set_attr "type" "neon_load4_4reg<q>")]
5086 (define_insn "neon_vld4qb<mode>"
5087 [(set (match_operand:XI 0 "s_register_operand" "=w")
5088 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5089 (match_operand:XI 2 "s_register_operand" "0")
5090 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5094 int regno = REGNO (operands[0]);
5096 ops[0] = gen_rtx_REG (DImode, regno + 2);
5097 ops[1] = gen_rtx_REG (DImode, regno + 6);
5098 ops[2] = gen_rtx_REG (DImode, regno + 10);
5099 ops[3] = gen_rtx_REG (DImode, regno + 14);
5100 ops[4] = operands[1];
5101 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5104 [(set_attr "type" "neon_load4_4reg<q>")]
5107 (define_insn "neon_vld4_lane<mode>"
5108 [(set (match_operand:OI 0 "s_register_operand" "=w")
5109 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5110 (match_operand:OI 2 "s_register_operand" "0")
5111 (match_operand:SI 3 "immediate_operand" "i")
5112 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5116 HOST_WIDE_INT lane = INTVAL (operands[3]);
5117 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5118 int regno = REGNO (operands[0]);
5120 if (lane < 0 || lane >= max)
5121 error ("lane out of range");
5122 ops[0] = gen_rtx_REG (DImode, regno);
5123 ops[1] = gen_rtx_REG (DImode, regno + 2);
5124 ops[2] = gen_rtx_REG (DImode, regno + 4);
5125 ops[3] = gen_rtx_REG (DImode, regno + 6);
5126 ops[4] = operands[1];
5127 ops[5] = operands[3];
5128 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5132 [(set_attr "type" "neon_load4_one_lane<q>")]
5135 (define_insn "neon_vld4_lane<mode>"
5136 [(set (match_operand:XI 0 "s_register_operand" "=w")
5137 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5138 (match_operand:XI 2 "s_register_operand" "0")
5139 (match_operand:SI 3 "immediate_operand" "i")
5140 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144 HOST_WIDE_INT lane = INTVAL (operands[3]);
5145 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5146 int regno = REGNO (operands[0]);
5148 if (lane < 0 || lane >= max)
5149 error ("lane out of range");
5150 else if (lane >= max / 2)
5155 ops[0] = gen_rtx_REG (DImode, regno);
5156 ops[1] = gen_rtx_REG (DImode, regno + 4);
5157 ops[2] = gen_rtx_REG (DImode, regno + 8);
5158 ops[3] = gen_rtx_REG (DImode, regno + 12);
5159 ops[4] = operands[1];
5160 ops[5] = GEN_INT (lane);
5161 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5165 [(set_attr "type" "neon_load4_one_lane<q>")]
5168 (define_insn "neon_vld4_dup<mode>"
5169 [(set (match_operand:OI 0 "s_register_operand" "=w")
5170 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5171 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5175 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5177 int regno = REGNO (operands[0]);
5179 ops[0] = gen_rtx_REG (DImode, regno);
5180 ops[1] = gen_rtx_REG (DImode, regno + 2);
5181 ops[2] = gen_rtx_REG (DImode, regno + 4);
5182 ops[3] = gen_rtx_REG (DImode, regno + 6);
5183 ops[4] = operands[1];
5184 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5189 return "vld1.<V_sz_elem>\t%h0, %A1";
5192 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5193 (const_string "neon_load4_all_lanes<q>")
5194 (const_string "neon_load1_1reg<q>")))]
5197 (define_expand "vec_store_lanesoi<mode>"
5198 [(set (match_operand:OI 0 "neon_struct_operand")
5199 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5200 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 (define_insn "neon_vst4<mode>"
5205 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5206 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5207 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5211 if (<V_sz_elem> == 64)
5212 return "vst1.64\t%h1, %A0";
5214 return "vst4.<V_sz_elem>\t%h1, %A0";
5217 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5218 (const_string "neon_store1_4reg<q>")
5219 (const_string "neon_store4_4reg<q>")))]
5222 (define_expand "vec_store_lanesxi<mode>"
5223 [(match_operand:XI 0 "neon_struct_operand")
5224 (match_operand:XI 1 "s_register_operand")
5225 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5228 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5232 (define_expand "neon_vst4<mode>"
5233 [(match_operand:XI 0 "neon_struct_operand")
5234 (match_operand:XI 1 "s_register_operand")
5235 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 mem = adjust_address (operands[0], OImode, 0);
5241 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5242 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5243 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5247 (define_insn "neon_vst4qa<mode>"
5248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5249 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5250 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 int regno = REGNO (operands[1]);
5256 ops[0] = operands[0];
5257 ops[1] = gen_rtx_REG (DImode, regno);
5258 ops[2] = gen_rtx_REG (DImode, regno + 4);
5259 ops[3] = gen_rtx_REG (DImode, regno + 8);
5260 ops[4] = gen_rtx_REG (DImode, regno + 12);
5261 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5264 [(set_attr "type" "neon_store4_4reg<q>")]
5267 (define_insn "neon_vst4qb<mode>"
5268 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5269 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5270 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274 int regno = REGNO (operands[1]);
5276 ops[0] = operands[0];
5277 ops[1] = gen_rtx_REG (DImode, regno + 2);
5278 ops[2] = gen_rtx_REG (DImode, regno + 6);
5279 ops[3] = gen_rtx_REG (DImode, regno + 10);
5280 ops[4] = gen_rtx_REG (DImode, regno + 14);
5281 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5284 [(set_attr "type" "neon_store4_4reg<q>")]
5287 (define_insn "neon_vst4_lane<mode>"
5288 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5289 (unspec:<V_four_elem>
5290 [(match_operand:OI 1 "s_register_operand" "w")
5291 (match_operand:SI 2 "immediate_operand" "i")
5292 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5296 HOST_WIDE_INT lane = INTVAL (operands[2]);
5297 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5298 int regno = REGNO (operands[1]);
5300 if (lane < 0 || lane >= max)
5301 error ("lane out of range");
5302 ops[0] = operands[0];
5303 ops[1] = gen_rtx_REG (DImode, regno);
5304 ops[2] = gen_rtx_REG (DImode, regno + 2);
5305 ops[3] = gen_rtx_REG (DImode, regno + 4);
5306 ops[4] = gen_rtx_REG (DImode, regno + 6);
5307 ops[5] = operands[2];
5308 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5312 [(set_attr "type" "neon_store4_one_lane<q>")]
5315 (define_insn "neon_vst4_lane<mode>"
5316 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5317 (unspec:<V_four_elem>
5318 [(match_operand:XI 1 "s_register_operand" "w")
5319 (match_operand:SI 2 "immediate_operand" "i")
5320 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 HOST_WIDE_INT lane = INTVAL (operands[2]);
5325 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5326 int regno = REGNO (operands[1]);
5328 if (lane < 0 || lane >= max)
5329 error ("lane out of range");
5330 else if (lane >= max / 2)
5335 ops[0] = operands[0];
5336 ops[1] = gen_rtx_REG (DImode, regno);
5337 ops[2] = gen_rtx_REG (DImode, regno + 4);
5338 ops[3] = gen_rtx_REG (DImode, regno + 8);
5339 ops[4] = gen_rtx_REG (DImode, regno + 12);
5340 ops[5] = GEN_INT (lane);
5341 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5345 [(set_attr "type" "neon_store4_4reg<q>")]
5348 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5349 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5350 (SE:<V_unpack> (vec_select:<V_HALF>
5351 (match_operand:VU 1 "register_operand" "w")
5352 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5353 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5354 "vmovl.<US><V_sz_elem> %q0, %e1"
5355 [(set_attr "type" "neon_shift_imm_long")]
5358 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5359 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5360 (SE:<V_unpack> (vec_select:<V_HALF>
5361 (match_operand:VU 1 "register_operand" "w")
5362 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5363 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5364 "vmovl.<US><V_sz_elem> %q0, %f1"
5365 [(set_attr "type" "neon_shift_imm_long")]
5368 (define_expand "vec_unpack<US>_hi_<mode>"
5369 [(match_operand:<V_unpack> 0 "register_operand" "")
5370 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5371 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5373 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5376 for (i = 0; i < (<V_mode_nunits>/2); i++)
5377 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5379 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5380 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5387 (define_expand "vec_unpack<US>_lo_<mode>"
5388 [(match_operand:<V_unpack> 0 "register_operand" "")
5389 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5390 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5392 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5395 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5396 RTVEC_ELT (v, i) = GEN_INT (i);
5397 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5398 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5405 (define_insn "neon_vec_<US>mult_lo_<mode>"
5406 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5407 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5408 (match_operand:VU 1 "register_operand" "w")
5409 (match_operand:VU 2 "vect_par_constant_low" "")))
5410 (SE:<V_unpack> (vec_select:<V_HALF>
5411 (match_operand:VU 3 "register_operand" "w")
5413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5414 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5415 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5418 (define_expand "vec_widen_<US>mult_lo_<mode>"
5419 [(match_operand:<V_unpack> 0 "register_operand" "")
5420 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5421 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5422 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5424 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5427 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5428 RTVEC_ELT (v, i) = GEN_INT (i);
5429 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5431 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5439 (define_insn "neon_vec_<US>mult_hi_<mode>"
5440 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5441 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5442 (match_operand:VU 1 "register_operand" "w")
5443 (match_operand:VU 2 "vect_par_constant_high" "")))
5444 (SE:<V_unpack> (vec_select:<V_HALF>
5445 (match_operand:VU 3 "register_operand" "w")
5447 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5448 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5449 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5452 (define_expand "vec_widen_<US>mult_hi_<mode>"
5453 [(match_operand:<V_unpack> 0 "register_operand" "")
5454 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5455 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5456 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5458 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5461 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5462 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5463 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5465 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5474 (define_insn "neon_vec_<US>shiftl_<mode>"
5475 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5476 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5477 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5480 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5482 [(set_attr "type" "neon_shift_imm_long")]
5485 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5486 [(match_operand:<V_unpack> 0 "register_operand" "")
5487 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5488 (match_operand:SI 2 "immediate_operand" "i")]
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5491 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5492 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5498 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5499 [(match_operand:<V_unpack> 0 "register_operand" "")
5500 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5501 (match_operand:SI 2 "immediate_operand" "i")]
5502 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5504 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5505 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5506 GET_MODE_SIZE (<V_HALF>mode)),
5512 ;; Vectorize for non-neon-quad case
5513 (define_insn "neon_unpack<US>_<mode>"
5514 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5515 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5517 "vmovl.<US><V_sz_elem> %q0, %P1"
5518 [(set_attr "type" "neon_move")]
5521 (define_expand "vec_unpack<US>_lo_<mode>"
5522 [(match_operand:<V_double_width> 0 "register_operand" "")
5523 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5526 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5527 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5528 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5534 (define_expand "vec_unpack<US>_hi_<mode>"
5535 [(match_operand:<V_double_width> 0 "register_operand" "")
5536 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5539 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5540 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5541 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5547 (define_insn "neon_vec_<US>mult_<mode>"
5548 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5549 (mult:<V_widen> (SE:<V_widen>
5550 (match_operand:VDI 1 "register_operand" "w"))
5552 (match_operand:VDI 2 "register_operand" "w"))))]
5554 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5555 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5558 (define_expand "vec_widen_<US>mult_hi_<mode>"
5559 [(match_operand:<V_double_width> 0 "register_operand" "")
5560 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5561 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5564 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5565 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5566 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5573 (define_expand "vec_widen_<US>mult_lo_<mode>"
5574 [(match_operand:<V_double_width> 0 "register_operand" "")
5575 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5576 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5579 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5580 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5581 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5588 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5589 [(match_operand:<V_double_width> 0 "register_operand" "")
5590 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5591 (match_operand:SI 2 "immediate_operand" "i")]
5594 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5595 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5596 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5602 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5603 [(match_operand:<V_double_width> 0 "register_operand" "")
5604 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5605 (match_operand:SI 2 "immediate_operand" "i")]
5608 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5609 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5610 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5616 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5617 ; because the ordering of vector elements in Q registers is different from what
5618 ; the semantics of the instructions require.
5620 (define_insn "vec_pack_trunc_<mode>"
5621 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5622 (vec_concat:<V_narrow_pack>
5623 (truncate:<V_narrow>
5624 (match_operand:VN 1 "register_operand" "w"))
5625 (truncate:<V_narrow>
5626 (match_operand:VN 2 "register_operand" "w"))))]
5627 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5628 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5629 [(set_attr "type" "multiple")
5630 (set_attr "length" "8")]
5633 ;; For the non-quad case.
5634 (define_insn "neon_vec_pack_trunc_<mode>"
5635 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5636 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5637 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5638 "vmovn.i<V_sz_elem>\t%P0, %q1"
5639 [(set_attr "type" "neon_move_narrow_q")]
5642 (define_expand "vec_pack_trunc_<mode>"
5643 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5644 (match_operand:VSHFT 1 "register_operand" "")
5645 (match_operand:VSHFT 2 "register_operand")]
5646 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5648 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5650 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5651 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5652 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5656 (define_insn "neon_vabd<mode>_2"
5657 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5658 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5659 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5660 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5661 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5663 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5664 (const_string "neon_fp_abd_s<q>")
5665 (const_string "neon_abd<q>")))]
5668 (define_insn "neon_vabd<mode>_3"
5669 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5670 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5671 (match_operand:VDQ 2 "s_register_operand" "w")]
5673 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5674 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5676 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5677 (const_string "neon_fp_abd_s<q>")
5678 (const_string "neon_abd<q>")))]
5681 ;; Copy from core-to-neon regs, then extend, not vice-versa
5684 [(set (match_operand:DI 0 "s_register_operand" "")
5685 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5686 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5687 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5688 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5690 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5694 [(set (match_operand:DI 0 "s_register_operand" "")
5695 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5696 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5697 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5698 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5700 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5704 [(set (match_operand:DI 0 "s_register_operand" "")
5705 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5706 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5707 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5708 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5710 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5714 [(set (match_operand:DI 0 "s_register_operand" "")
5715 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5716 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5717 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5718 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5720 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5724 [(set (match_operand:DI 0 "s_register_operand" "")
5725 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5726 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5727 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5728 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5730 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5734 [(set (match_operand:DI 0 "s_register_operand" "")
5735 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5736 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5737 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5738 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5740 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));