2017-11-17 Steve Ellcey <sellcey@cavium.com>
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blobcddd935d96589c52519334bd1b8c24e80ea475f3
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23         (match_operand:VALL_F16 1 "general_operand" ""))]
24   "TARGET_SIMD"
25   "
26   /* Force the operand into a register if it is not an
27      immediate whose use can be replaced with xzr.
28      If the mode is 16 bytes wide, then we will be doing
29      a stp in DI mode, so we check the validity of that.
30      If the mode is 8 bytes wide, then we will do doing a
31      normal str, so the check need not apply.  */
32   if (GET_CODE (operands[0]) == MEM
33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34            && ((GET_MODE_SIZE (<MODE>mode) == 16
35                 && aarch64_mem_pair_operand (operands[0], DImode))
36                || GET_MODE_SIZE (<MODE>mode) == 8)))
37       operands[1] = force_reg (<MODE>mode, operands[1]);
38   "
41 (define_expand "movmisalign<mode>"
42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43         (match_operand:VALL 1 "general_operand" ""))]
44   "TARGET_SIMD"
46   /* This pattern is not permitted to fail during expansion: if both arguments
47      are non-registers (e.g. memory := constant, which can be created by the
48      auto-vectorizer), force operand 1 into a register.  */
49   if (!register_operand (operands[0], <MODE>mode)
50       && !register_operand (operands[1], <MODE>mode))
51     operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56         (vec_duplicate:VDQ_I
57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58   "TARGET_SIMD"
59   "@
60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
61    dup\\t%0.<Vtype>, %<vw>1"
62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67         (vec_duplicate:VDQF_F16
68           (match_operand:<VEL> 1 "register_operand" "w")))]
69   "TARGET_SIMD"
70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71   [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76         (vec_duplicate:VALL_F16
77           (vec_select:<VEL>
78             (match_operand:VALL_F16 1 "register_operand" "w")
79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80           )))]
81   "TARGET_SIMD"
82   {
83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85   }
86   [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91         (vec_duplicate:VALL_F16_NO_V2Q
92           (vec_select:<VEL>
93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95           )))]
96   "TARGET_SIMD"
97   {
98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100   }
101   [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105   [(set (match_operand:VD 0 "nonimmediate_operand"
106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
107         (match_operand:VD 1 "general_operand"
108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
109   "TARGET_SIMD
110    && (register_operand (operands[0], <MODE>mode)
111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113    switch (which_alternative)
114      {
115      case 0: return "ldr\t%d0, %1";
116      case 1: return "str\txzr, %0";
117      case 2: return "str\t%d1, %0";
118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119      case 4: return "umov\t%0, %1.d[0]";
120      case 5: return "fmov\t%d0, %1";
121      case 6: return "mov\t%0, %1";
122      case 7:
123         return aarch64_output_simd_mov_immediate (operands[1],
124                                                   <MODE>mode, 64);
125      default: gcc_unreachable ();
126      }
128   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
130                      mov_reg, neon_move<q>")]
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134   [(set (match_operand:VQ 0 "nonimmediate_operand"
135                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
136         (match_operand:VQ 1 "general_operand"
137                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
138   "TARGET_SIMD
139    && (register_operand (operands[0], <MODE>mode)
140        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
142   switch (which_alternative)
143     {
144     case 0:
145         return "ldr\t%q0, %1";
146     case 1:
147         return "stp\txzr, xzr, %0";
148     case 2:
149         return "str\t%q1, %0";
150     case 3:
151         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
152     case 4:
153     case 5:
154     case 6:
155         return "#";
156     case 7:
157         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
158     default:
159         gcc_unreachable ();
160     }
162   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163                      neon_logic<q>, multiple, multiple,\
164                      multiple, neon_move<q>")
165    (set_attr "length" "4,4,4,4,8,8,8,4")]
168 ;; When storing lane zero we can use the normal STR and its more permissive
169 ;; addressing modes.
171 (define_insn "aarch64_store_lane0<mode>"
172   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
175   "TARGET_SIMD
176    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
177   "str\\t%<Vetype>1, %0"
178   [(set_attr "type" "neon_store1_1reg<q>")]
181 (define_insn "load_pair<mode>"
182   [(set (match_operand:VD 0 "register_operand" "=w")
183         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184    (set (match_operand:VD 2 "register_operand" "=w")
185         (match_operand:VD 3 "memory_operand" "m"))]
186   "TARGET_SIMD
187    && rtx_equal_p (XEXP (operands[3], 0),
188                    plus_constant (Pmode,
189                                   XEXP (operands[1], 0),
190                                   GET_MODE_SIZE (<MODE>mode)))"
191   "ldp\\t%d0, %d2, %1"
192   [(set_attr "type" "neon_ldp")]
195 (define_insn "store_pair<mode>"
196   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197         (match_operand:VD 1 "register_operand" "w"))
198    (set (match_operand:VD 2 "memory_operand" "=m")
199         (match_operand:VD 3 "register_operand" "w"))]
200   "TARGET_SIMD
201    && rtx_equal_p (XEXP (operands[2], 0),
202                    plus_constant (Pmode,
203                                   XEXP (operands[0], 0),
204                                   GET_MODE_SIZE (<MODE>mode)))"
205   "stp\\t%d1, %d3, %0"
206   [(set_attr "type" "neon_stp")]
209 (define_split
210   [(set (match_operand:VQ 0 "register_operand" "")
211       (match_operand:VQ 1 "register_operand" ""))]
212   "TARGET_SIMD && reload_completed
213    && GP_REGNUM_P (REGNO (operands[0]))
214    && GP_REGNUM_P (REGNO (operands[1]))"
215   [(const_int 0)]
217   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
218   DONE;
221 (define_split
222   [(set (match_operand:VQ 0 "register_operand" "")
223         (match_operand:VQ 1 "register_operand" ""))]
224   "TARGET_SIMD && reload_completed
225    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
227   [(const_int 0)]
229   aarch64_split_simd_move (operands[0], operands[1]);
230   DONE;
233 (define_expand "aarch64_split_simd_mov<mode>"
234   [(set (match_operand:VQ 0)
235         (match_operand:VQ 1))]
236   "TARGET_SIMD"
237   {
238     rtx dst = operands[0];
239     rtx src = operands[1];
241     if (GP_REGNUM_P (REGNO (src)))
242       {
243         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244         rtx src_high_part = gen_highpart (<VHALF>mode, src);
246         emit_insn
247           (gen_move_lo_quad_<mode> (dst, src_low_part));
248         emit_insn
249           (gen_move_hi_quad_<mode> (dst, src_high_part));
250       }
252     else
253       {
254         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
259         emit_insn
260           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261         emit_insn
262           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
263       }
264     DONE;
265   }
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270         (vec_select:<VHALF>
271           (match_operand:VQ 1 "register_operand" "w")
272           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273   "TARGET_SIMD && reload_completed"
274   "umov\t%0, %1.d[0]"
275   [(set_attr "type" "neon_to_gp<q>")
276    (set_attr "length" "4")
277   ])
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
281         (vec_select:<VHALF>
282           (match_operand:VQ 1 "register_operand" "w")
283           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284   "TARGET_SIMD && reload_completed"
285   "umov\t%0, %1.d[1]"
286   [(set_attr "type" "neon_to_gp<q>")
287    (set_attr "length" "4")
288   ])
290 (define_insn "orn<mode>3"
291  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293                 (match_operand:VDQ_I 2 "register_operand" "w")))]
294  "TARGET_SIMD"
295  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296   [(set_attr "type" "neon_logic<q>")]
299 (define_insn "bic<mode>3"
300  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302                 (match_operand:VDQ_I 2 "register_operand" "w")))]
303  "TARGET_SIMD"
304  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305   [(set_attr "type" "neon_logic<q>")]
308 (define_insn "add<mode>3"
309   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311                   (match_operand:VDQ_I 2 "register_operand" "w")))]
312   "TARGET_SIMD"
313   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314   [(set_attr "type" "neon_add<q>")]
317 (define_insn "sub<mode>3"
318   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320                    (match_operand:VDQ_I 2 "register_operand" "w")))]
321   "TARGET_SIMD"
322   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323   [(set_attr "type" "neon_sub<q>")]
326 (define_insn "mul<mode>3"
327   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
330   "TARGET_SIMD"
331   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332   [(set_attr "type" "neon_mul_<Vetype><q>")]
335 (define_insn "bswap<mode>2"
336   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
338   "TARGET_SIMD"
339   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340   [(set_attr "type" "neon_rev<q>")]
343 (define_insn "aarch64_rbit<mode>"
344   [(set (match_operand:VB 0 "register_operand" "=w")
345         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
346                    UNSPEC_RBIT))]
347   "TARGET_SIMD"
348   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349   [(set_attr "type" "neon_rbit")]
352 (define_expand "ctz<mode>2"
353   [(set (match_operand:VS 0 "register_operand")
354         (ctz:VS (match_operand:VS 1 "register_operand")))]
355   "TARGET_SIMD"
356   {
357      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
359                                              <MODE>mode, 0);
360      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
362      DONE;
363   }
366 (define_expand "xorsign<mode>3"
367   [(match_operand:VHSDF 0 "register_operand")
368    (match_operand:VHSDF 1 "register_operand")
369    (match_operand:VHSDF 2 "register_operand")]
370   "TARGET_SIMD"
373   machine_mode imode = <V_INT_EQUIV>mode;
374   rtx v_bitmask = gen_reg_rtx (imode);
375   rtx op1x = gen_reg_rtx (imode);
376   rtx op2x = gen_reg_rtx (imode);
378   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
381   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
383   emit_move_insn (v_bitmask,
384                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385                                                      HOST_WIDE_INT_M1U << bits));
387   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389   emit_move_insn (operands[0],
390                   lowpart_subreg (<MODE>mode, op1x, imode));
391   DONE;
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397   [(set (match_operand:VS 0 "register_operand" "=w")
398         (plus:VS (match_operand:VS 1 "register_operand" "0")
399                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
401                 DOTPROD)))]
402   "TARGET_DOTPROD"
403   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404   [(set_attr "type" "neon_dot")]
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
412 ;; for (i=0; i<len; i++) {
413 ;;     c = a[i] * b[i];
414 ;;     r += c;
415 ;; }
416 ;; return result;
418 ;; This can be auto-vectorized to
419 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
421 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
424 ;; ...
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428   [(set (match_operand:VS 0 "register_operand")
429         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430                             (match_operand:<VSI2QI> 2 "register_operand")]
431                  DOTPROD)
432                 (match_operand:VS 3 "register_operand")))]
433   "TARGET_DOTPROD"
435   emit_insn (
436     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
437                                     operands[2]));
438   emit_insn (gen_rtx_SET (operands[0], operands[3]));
439   DONE;
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445   [(set (match_operand:VS 0 "register_operand" "=w")
446         (plus:VS (match_operand:VS 1 "register_operand" "0")
447                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448                             (match_operand:V8QI 3 "register_operand" "<h_con>")
449                             (match_operand:SI 4 "immediate_operand" "i")]
450                 DOTPROD)))]
451   "TARGET_DOTPROD"
452   {
453     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
454     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
455   }
456   [(set_attr "type" "neon_dot")]
459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
460   [(set (match_operand:VS 0 "register_operand" "=w")
461         (plus:VS (match_operand:VS 1 "register_operand" "0")
462                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
463                             (match_operand:V16QI 3 "register_operand" "<h_con>")
464                             (match_operand:SI 4 "immediate_operand" "i")]
465                 DOTPROD)))]
466   "TARGET_DOTPROD"
467   {
468     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
469     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
470   }
471   [(set_attr "type" "neon_dot")]
474 (define_expand "copysign<mode>3"
475   [(match_operand:VHSDF 0 "register_operand")
476    (match_operand:VHSDF 1 "register_operand")
477    (match_operand:VHSDF 2 "register_operand")]
478   "TARGET_FLOAT && TARGET_SIMD"
480   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
481   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
483   emit_move_insn (v_bitmask,
484                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
485                                                      HOST_WIDE_INT_M1U << bits));
486   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
487                                          operands[2], operands[1]));
488   DONE;
492 (define_insn "*aarch64_mul3_elt<mode>"
493  [(set (match_operand:VMUL 0 "register_operand" "=w")
494     (mult:VMUL
495       (vec_duplicate:VMUL
496           (vec_select:<VEL>
497             (match_operand:VMUL 1 "register_operand" "<h_con>")
498             (parallel [(match_operand:SI 2 "immediate_operand")])))
499       (match_operand:VMUL 3 "register_operand" "w")))]
500   "TARGET_SIMD"
501   {
502     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
503     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
504   }
505   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
509   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
510      (mult:VMUL_CHANGE_NLANES
511        (vec_duplicate:VMUL_CHANGE_NLANES
512           (vec_select:<VEL>
513             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
514             (parallel [(match_operand:SI 2 "immediate_operand")])))
515       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
516   "TARGET_SIMD"
517   {
518     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
519     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
520   }
521   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
525  [(set (match_operand:VMUL 0 "register_operand" "=w")
526     (mult:VMUL
527       (vec_duplicate:VMUL
528             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
529       (match_operand:VMUL 2 "register_operand" "w")))]
530   "TARGET_SIMD"
531   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "aarch64_rsqrte<mode>"
536   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
537         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
538                      UNSPEC_RSQRTE))]
539   "TARGET_SIMD"
540   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
541   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
543 (define_insn "aarch64_rsqrts<mode>"
544   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
546                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
547          UNSPEC_RSQRTS))]
548   "TARGET_SIMD"
549   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
550   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
552 (define_expand "rsqrt<mode>2"
553   [(set (match_operand:VALLF 0 "register_operand" "=w")
554         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
555                      UNSPEC_RSQRT))]
556   "TARGET_SIMD"
558   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
559   DONE;
562 (define_insn "*aarch64_mul3_elt_to_64v2df"
563   [(set (match_operand:DF 0 "register_operand" "=w")
564      (mult:DF
565        (vec_select:DF
566          (match_operand:V2DF 1 "register_operand" "w")
567          (parallel [(match_operand:SI 2 "immediate_operand")]))
568        (match_operand:DF 3 "register_operand" "w")))]
569   "TARGET_SIMD"
570   {
571     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
572     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
573   }
574   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
577 (define_insn "neg<mode>2"
578   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
579         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
580   "TARGET_SIMD"
581   "neg\t%0.<Vtype>, %1.<Vtype>"
582   [(set_attr "type" "neon_neg<q>")]
585 (define_insn "abs<mode>2"
586   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
588   "TARGET_SIMD"
589   "abs\t%0.<Vtype>, %1.<Vtype>"
590   [(set_attr "type" "neon_abs<q>")]
593 ;; The intrinsic version of integer ABS must not be allowed to
594 ;; combine with any operation with an integerated ABS step, such
595 ;; as SABD.
596 (define_insn "aarch64_abs<mode>"
597   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
598           (unspec:VSDQ_I_DI
599             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
600            UNSPEC_ABS))]
601   "TARGET_SIMD"
602   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
603   [(set_attr "type" "neon_abs<q>")]
606 (define_insn "abd<mode>_3"
607   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
608         (abs:VDQ_BHSI (minus:VDQ_BHSI
609                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
610                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
611   "TARGET_SIMD"
612   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
613   [(set_attr "type" "neon_abd<q>")]
616 (define_insn "aba<mode>_3"
617   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
618         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
619                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
620                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
621                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
622   "TARGET_SIMD"
623   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
624   [(set_attr "type" "neon_arith_acc<q>")]
627 (define_insn "fabd<mode>3"
628   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
629         (abs:VHSDF_HSDF
630           (minus:VHSDF_HSDF
631             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
632             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
633   "TARGET_SIMD"
634   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
635   [(set_attr "type" "neon_fp_abd_<stype><q>")]
638 ;; For AND (vector, register) and BIC (vector, immediate)
639 (define_insn "and<mode>3"
640   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
641         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
642                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
643   "TARGET_SIMD"
644   {
645     switch (which_alternative)
646       {
647       case 0:
648         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
649       case 1:
650         return aarch64_output_simd_mov_immediate (operands[2],
651            <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
652       default:
653         gcc_unreachable ();
654       }
655   }
656   [(set_attr "type" "neon_logic<q>")]
659 ;; For ORR (vector, register) and ORR (vector, immediate)
660 (define_insn "ior<mode>3"
661   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
662         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
663                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
664   "TARGET_SIMD"
665   {
666     switch (which_alternative)
667       {
668       case 0:
669         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
670       case 1:
671         return aarch64_output_simd_mov_immediate (operands[2],
672                 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
673       default:
674         gcc_unreachable ();
675       }
676   }
677   [(set_attr "type" "neon_logic<q>")]
680 (define_insn "xor<mode>3"
681   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
682         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
683                  (match_operand:VDQ_I 2 "register_operand" "w")))]
684   "TARGET_SIMD"
685   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
686   [(set_attr "type" "neon_logic<q>")]
689 (define_insn "one_cmpl<mode>2"
690   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
691         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
692   "TARGET_SIMD"
693   "not\t%0.<Vbtype>, %1.<Vbtype>"
694   [(set_attr "type" "neon_logic<q>")]
697 (define_insn "aarch64_simd_vec_set<mode>"
698   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
699         (vec_merge:VDQ_BHSI
700             (vec_duplicate:VDQ_BHSI
701                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
702             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
703             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
704   "TARGET_SIMD"
705   {
706    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
707    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708    switch (which_alternative)
709      {
710      case 0:
711         return "ins\\t%0.<Vetype>[%p2], %w1";
712      case 1:
713         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
714      case 2:
715         return "ld1\\t{%0.<Vetype>}[%p2], %1";
716      default:
717         gcc_unreachable ();
718      }
719   }
720   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
724   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
725         (vec_merge:VALL_F16
726             (vec_duplicate:VALL_F16
727               (vec_select:<VEL>
728                 (match_operand:VALL_F16 3 "register_operand" "w")
729                 (parallel
730                   [(match_operand:SI 4 "immediate_operand" "i")])))
731             (match_operand:VALL_F16 1 "register_operand" "0")
732             (match_operand:SI 2 "immediate_operand" "i")))]
733   "TARGET_SIMD"
734   {
735     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
736     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
737     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
739     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
740   }
741   [(set_attr "type" "neon_ins<q>")]
744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
745   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
746         (vec_merge:VALL_F16_NO_V2Q
747             (vec_duplicate:VALL_F16_NO_V2Q
748               (vec_select:<VEL>
749                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
750                 (parallel
751                   [(match_operand:SI 4 "immediate_operand" "i")])))
752             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
753             (match_operand:SI 2 "immediate_operand" "i")))]
754   "TARGET_SIMD"
755   {
756     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
757     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
758     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
759                                            INTVAL (operands[4]));
761     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
762   }
763   [(set_attr "type" "neon_ins<q>")]
766 (define_insn "aarch64_simd_lshr<mode>"
767  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
768        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
769                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
770  "TARGET_SIMD"
771  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
772   [(set_attr "type" "neon_shift_imm<q>")]
775 (define_insn "aarch64_simd_ashr<mode>"
776  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
777        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
778                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
779  "TARGET_SIMD"
780  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
781   [(set_attr "type" "neon_shift_imm<q>")]
784 (define_insn "aarch64_simd_imm_shl<mode>"
785  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
786        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
787                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
788  "TARGET_SIMD"
789   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
790   [(set_attr "type" "neon_shift_imm<q>")]
793 (define_insn "aarch64_simd_reg_sshl<mode>"
794  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
795        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
796                    (match_operand:VDQ_I 2 "register_operand" "w")))]
797  "TARGET_SIMD"
798  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
799   [(set_attr "type" "neon_shift_reg<q>")]
802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
803  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
804        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
805                     (match_operand:VDQ_I 2 "register_operand" "w")]
806                    UNSPEC_ASHIFT_UNSIGNED))]
807  "TARGET_SIMD"
808  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
809   [(set_attr "type" "neon_shift_reg<q>")]
812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
813  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
815                     (match_operand:VDQ_I 2 "register_operand" "w")]
816                    UNSPEC_ASHIFT_SIGNED))]
817  "TARGET_SIMD"
818  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
819   [(set_attr "type" "neon_shift_reg<q>")]
822 (define_expand "ashl<mode>3"
823   [(match_operand:VDQ_I 0 "register_operand" "")
824    (match_operand:VDQ_I 1 "register_operand" "")
825    (match_operand:SI  2 "general_operand" "")]
826  "TARGET_SIMD"
828   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
829   int shift_amount;
831   if (CONST_INT_P (operands[2]))
832     {
833       shift_amount = INTVAL (operands[2]);
834       if (shift_amount >= 0 && shift_amount < bit_width)
835         {
836           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
837                                                        shift_amount);
838           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
839                                                      operands[1],
840                                                      tmp));
841           DONE;
842         }
843       else
844         {
845           operands[2] = force_reg (SImode, operands[2]);
846         }
847     }
848   else if (MEM_P (operands[2]))
849     {
850       operands[2] = force_reg (SImode, operands[2]);
851     }
853   if (REG_P (operands[2]))
854     {
855       rtx tmp = gen_reg_rtx (<MODE>mode);
856       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
857                                              convert_to_mode (<VEL>mode,
858                                                               operands[2],
859                                                               0)));
860       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
861                                                   tmp));
862       DONE;
863     }
864   else
865     FAIL;
869 (define_expand "lshr<mode>3"
870   [(match_operand:VDQ_I 0 "register_operand" "")
871    (match_operand:VDQ_I 1 "register_operand" "")
872    (match_operand:SI  2 "general_operand" "")]
873  "TARGET_SIMD"
875   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
876   int shift_amount;
878   if (CONST_INT_P (operands[2]))
879     {
880       shift_amount = INTVAL (operands[2]);
881       if (shift_amount > 0 && shift_amount <= bit_width)
882         {
883           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
884                                                        shift_amount);
885           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
886                                                   operands[1],
887                                                   tmp));
888           DONE;
889         }
890       else
891         operands[2] = force_reg (SImode, operands[2]);
892     }
893   else if (MEM_P (operands[2]))
894     {
895       operands[2] = force_reg (SImode, operands[2]);
896     }
898   if (REG_P (operands[2]))
899     {
900       rtx tmp = gen_reg_rtx (SImode);
901       rtx tmp1 = gen_reg_rtx (<MODE>mode);
902       emit_insn (gen_negsi2 (tmp, operands[2]));
903       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
904                                              convert_to_mode (<VEL>mode,
905                                                               tmp, 0)));
906       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
907                                                           operands[1],
908                                                           tmp1));
909       DONE;
910     }
911   else
912     FAIL;
916 (define_expand "ashr<mode>3"
917   [(match_operand:VDQ_I 0 "register_operand" "")
918    (match_operand:VDQ_I 1 "register_operand" "")
919    (match_operand:SI  2 "general_operand" "")]
920  "TARGET_SIMD"
922   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
923   int shift_amount;
925   if (CONST_INT_P (operands[2]))
926     {
927       shift_amount = INTVAL (operands[2]);
928       if (shift_amount > 0 && shift_amount <= bit_width)
929         {
930           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
931                                                        shift_amount);
932           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
933                                                   operands[1],
934                                                   tmp));
935           DONE;
936         }
937       else
938         operands[2] = force_reg (SImode, operands[2]);
939     }
940   else if (MEM_P (operands[2]))
941     {
942       operands[2] = force_reg (SImode, operands[2]);
943     }
945   if (REG_P (operands[2]))
946     {
947       rtx tmp = gen_reg_rtx (SImode);
948       rtx tmp1 = gen_reg_rtx (<MODE>mode);
949       emit_insn (gen_negsi2 (tmp, operands[2]));
950       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
951                                              convert_to_mode (<VEL>mode,
952                                                               tmp, 0)));
953       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
954                                                         operands[1],
955                                                         tmp1));
956       DONE;
957     }
958   else
959     FAIL;
963 (define_expand "vashl<mode>3"
964  [(match_operand:VDQ_I 0 "register_operand" "")
965   (match_operand:VDQ_I 1 "register_operand" "")
966   (match_operand:VDQ_I 2 "register_operand" "")]
967  "TARGET_SIMD"
969   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
970                                               operands[2]));
971   DONE;
974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
975 ;; Negating individual lanes most certainly offsets the
976 ;; gain from vectorization.
977 (define_expand "vashr<mode>3"
978  [(match_operand:VDQ_BHSI 0 "register_operand" "")
979   (match_operand:VDQ_BHSI 1 "register_operand" "")
980   (match_operand:VDQ_BHSI 2 "register_operand" "")]
981  "TARGET_SIMD"
983   rtx neg = gen_reg_rtx (<MODE>mode);
984   emit (gen_neg<mode>2 (neg, operands[2]));
985   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
986                                                     neg));
987   DONE;
990 ;; DI vector shift
991 (define_expand "aarch64_ashr_simddi"
992   [(match_operand:DI 0 "register_operand" "=w")
993    (match_operand:DI 1 "register_operand" "w")
994    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
995   "TARGET_SIMD"
996   {
997     /* An arithmetic shift right by 64 fills the result with copies of the sign
998        bit, just like asr by 63 - however the standard pattern does not handle
999        a shift by 64.  */
1000     if (INTVAL (operands[2]) == 64)
1001       operands[2] = GEN_INT (63);
1002     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1003     DONE;
1004   }
1007 (define_expand "vlshr<mode>3"
1008  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009   (match_operand:VDQ_BHSI 1 "register_operand" "")
1010   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1011  "TARGET_SIMD"
1013   rtx neg = gen_reg_rtx (<MODE>mode);
1014   emit (gen_neg<mode>2 (neg, operands[2]));
1015   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1016                                                       neg));
1017   DONE;
1020 (define_expand "aarch64_lshr_simddi"
1021   [(match_operand:DI 0 "register_operand" "=w")
1022    (match_operand:DI 1 "register_operand" "w")
1023    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1024   "TARGET_SIMD"
1025   {
1026     if (INTVAL (operands[2]) == 64)
1027       emit_move_insn (operands[0], const0_rtx);
1028     else
1029       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1030     DONE;
1031   }
1034 (define_expand "vec_set<mode>"
1035   [(match_operand:VDQ_BHSI 0 "register_operand")
1036    (match_operand:<VEL> 1 "register_operand")
1037    (match_operand:SI 2 "immediate_operand")]
1038   "TARGET_SIMD"
1039   {
1040     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042                                             GEN_INT (elem), operands[0]));
1043     DONE;
1044   }
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049   [(set (match_operand:VD 0 "register_operand" "=w")
1050         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051                     (match_operand:SI 2 "immediate_operand" "i")]
1052                    UNSPEC_VEC_SHR))]
1053   "TARGET_SIMD"
1054   {
1055     if (BYTES_BIG_ENDIAN)
1056       return "shl %d0, %d1, %2";
1057     else
1058       return "ushr %d0, %d1, %2";
1059   }
1060   [(set_attr "type" "neon_shift_imm")]
1063 (define_insn "aarch64_simd_vec_setv2di"
1064   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1065         (vec_merge:V2DI
1066             (vec_duplicate:V2DI
1067                 (match_operand:DI 1 "register_operand" "r,w"))
1068             (match_operand:V2DI 3 "register_operand" "0,0")
1069             (match_operand:SI 2 "immediate_operand" "i,i")))]
1070   "TARGET_SIMD"
1071   {
1072     int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074     switch (which_alternative)
1075       {
1076       case 0:
1077         return "ins\\t%0.d[%p2], %1";
1078       case 1:
1079         return "ins\\t%0.d[%p2], %1.d[0]";
1080       default:
1081         gcc_unreachable ();
1082       }
1083   }
1084   [(set_attr "type" "neon_from_gp, neon_ins_q")]
1087 (define_expand "vec_setv2di"
1088   [(match_operand:V2DI 0 "register_operand")
1089    (match_operand:DI 1 "register_operand")
1090    (match_operand:SI 2 "immediate_operand")]
1091   "TARGET_SIMD"
1092   {
1093     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095                                           GEN_INT (elem), operands[0]));
1096     DONE;
1097   }
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1102         (vec_merge:VDQF_F16
1103             (vec_duplicate:VDQF_F16
1104                 (match_operand:<VEL> 1 "register_operand" "w"))
1105             (match_operand:VDQF_F16 3 "register_operand" "0")
1106             (match_operand:SI 2 "immediate_operand" "i")))]
1107   "TARGET_SIMD"
1108   {
1109     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1111     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1113   }
1114   [(set_attr "type" "neon_ins<q>")]
1117 (define_expand "vec_set<mode>"
1118   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119    (match_operand:<VEL> 1 "register_operand" "w")
1120    (match_operand:SI 2 "immediate_operand" "")]
1121   "TARGET_SIMD"
1122   {
1123     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125                                           GEN_INT (elem), operands[0]));
1126     DONE;
1127   }
1131 (define_insn "aarch64_mla<mode>"
1132  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133        (plus:VDQ_BHSI (mult:VDQ_BHSI
1134                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1137  "TARGET_SIMD"
1138  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139   [(set_attr "type" "neon_mla_<Vetype><q>")]
1142 (define_insn "*aarch64_mla_elt<mode>"
1143  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1144        (plus:VDQHS
1145          (mult:VDQHS
1146            (vec_duplicate:VDQHS
1147               (vec_select:<VEL>
1148                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1150            (match_operand:VDQHS 3 "register_operand" "w"))
1151          (match_operand:VDQHS 4 "register_operand" "0")))]
1152  "TARGET_SIMD"
1153   {
1154     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1156   }
1157   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1162        (plus:VDQHS
1163          (mult:VDQHS
1164            (vec_duplicate:VDQHS
1165               (vec_select:<VEL>
1166                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1168            (match_operand:VDQHS 3 "register_operand" "w"))
1169          (match_operand:VDQHS 4 "register_operand" "0")))]
1170  "TARGET_SIMD"
1171   {
1172     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1174   }
1175   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1180         (plus:VDQHS
1181           (mult:VDQHS (vec_duplicate:VDQHS
1182                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183                 (match_operand:VDQHS 2 "register_operand" "w"))
1184           (match_operand:VDQHS 3 "register_operand" "0")))]
1185  "TARGET_SIMD"
1186  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1190 (define_insn "aarch64_mls<mode>"
1191  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1195  "TARGET_SIMD"
1196  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197   [(set_attr "type" "neon_mla_<Vetype><q>")]
1200 (define_insn "*aarch64_mls_elt<mode>"
1201  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202        (minus:VDQHS
1203          (match_operand:VDQHS 4 "register_operand" "0")
1204          (mult:VDQHS
1205            (vec_duplicate:VDQHS
1206               (vec_select:<VEL>
1207                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1209            (match_operand:VDQHS 3 "register_operand" "w"))))]
1210  "TARGET_SIMD"
1211   {
1212     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1214   }
1215   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1220        (minus:VDQHS
1221          (match_operand:VDQHS 4 "register_operand" "0")
1222          (mult:VDQHS
1223            (vec_duplicate:VDQHS
1224               (vec_select:<VEL>
1225                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1227            (match_operand:VDQHS 3 "register_operand" "w"))))]
1228  "TARGET_SIMD"
1229   {
1230     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1232   }
1233   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1238         (minus:VDQHS
1239           (match_operand:VDQHS 1 "register_operand" "0")
1240           (mult:VDQHS (vec_duplicate:VDQHS
1241                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1243   "TARGET_SIMD"
1244   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1253  "TARGET_SIMD"
1254  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255   [(set_attr "type" "neon_minmax<q>")]
1258 (define_expand "<su><maxmin>v2di3"
1259  [(set (match_operand:V2DI 0 "register_operand" "")
1260        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261                     (match_operand:V2DI 2 "register_operand" "")))]
1262  "TARGET_SIMD"
1264   enum rtx_code cmp_operator;
1265   rtx cmp_fmt;
1267   switch (<CODE>)
1268     {
1269     case UMIN:
1270       cmp_operator = LTU;
1271       break;
1272     case SMIN:
1273       cmp_operator = LT;
1274       break;
1275     case UMAX:
1276       cmp_operator = GTU;
1277       break;
1278     case SMAX:
1279       cmp_operator = GT;
1280       break;
1281     default:
1282       gcc_unreachable ();
1283     }
1285   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287               operands[2], cmp_fmt, operands[1], operands[2]));
1288   DONE;
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1296                         MAXMINV))]
1297  "TARGET_SIMD"
1298  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299   [(set_attr "type" "neon_minmax<q>")]
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306                       (match_operand:VHSDF 2 "register_operand" "w")]
1307                       FMAXMINV))]
1308  "TARGET_SIMD"
1309  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310   [(set_attr "type" "neon_minmax<q>")]
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1323 (define_insn "move_lo_quad_internal_<mode>"
1324   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1325         (vec_concat:VQ_NO2E
1326           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327           (vec_duplicate:<VHALF> (const_int 0))))]
1328   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1329   "@
1330    dup\\t%d0, %1.d[0]
1331    fmov\\t%d0, %1
1332    dup\\t%d0, %1"
1333   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334    (set_attr "simd" "yes,*,yes")
1335    (set_attr "fp" "*,yes,*")
1336    (set_attr "length" "4")]
1339 (define_insn "move_lo_quad_internal_<mode>"
1340   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1341         (vec_concat:VQ_2E
1342           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1343           (const_int 0)))]
1344   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1345   "@
1346    dup\\t%d0, %1.d[0]
1347    fmov\\t%d0, %1
1348    dup\\t%d0, %1"
1349   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350    (set_attr "simd" "yes,*,yes")
1351    (set_attr "fp" "*,yes,*")
1352    (set_attr "length" "4")]
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1357         (vec_concat:VQ_NO2E
1358           (vec_duplicate:<VHALF> (const_int 0))
1359           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1361   "@
1362    dup\\t%d0, %1.d[0]
1363    fmov\\t%d0, %1
1364    dup\\t%d0, %1"
1365   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366    (set_attr "simd" "yes,*,yes")
1367    (set_attr "fp" "*,yes,*")
1368    (set_attr "length" "4")]
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1373         (vec_concat:VQ_2E
1374           (const_int 0)
1375           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1377   "@
1378    dup\\t%d0, %1.d[0]
1379    fmov\\t%d0, %1
1380    dup\\t%d0, %1"
1381   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382    (set_attr "simd" "yes,*,yes")
1383    (set_attr "fp" "*,yes,*")
1384    (set_attr "length" "4")]
1387 (define_expand "move_lo_quad_<mode>"
1388   [(match_operand:VQ 0 "register_operand")
1389    (match_operand:VQ 1 "register_operand")]
1390   "TARGET_SIMD"
1392   if (BYTES_BIG_ENDIAN)
1393     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1394   else
1395     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1396   DONE;
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1407         (vec_concat:VQ
1408           (vec_select:<VHALF>
1409                 (match_dup 0)
1410                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413   "@
1414    ins\\t%0.d[1], %1.d[0]
1415    ins\\t%0.d[1], %1"
1416   [(set_attr "type" "neon_ins")]
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1421         (vec_concat:VQ
1422           (match_operand:<VHALF> 1 "register_operand" "w,r")
1423           (vec_select:<VHALF>
1424                 (match_dup 0)
1425                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427   "@
1428    ins\\t%0.d[1], %1.d[0]
1429    ins\\t%0.d[1], %1"
1430   [(set_attr "type" "neon_ins")]
1433 (define_expand "move_hi_quad_<mode>"
1434  [(match_operand:VQ 0 "register_operand" "")
1435   (match_operand:<VHALF> 1 "register_operand" "")]
1436  "TARGET_SIMD"
1438   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439   if (BYTES_BIG_ENDIAN)
1440     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1441                     operands[1], p));
1442   else
1443     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1444                     operands[1], p));
1445   DONE;
1448 ;; Narrowing operations.
1450 ;; For doubles.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1454  "TARGET_SIMD"
1455  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456   [(set_attr "type" "neon_shift_imm_narrow_q")]
1459 (define_expand "vec_pack_trunc_<mode>"
1460  [(match_operand:<VNARROWD> 0 "register_operand" "")
1461   (match_operand:VDN 1 "register_operand" "")
1462   (match_operand:VDN 2 "register_operand" "")]
1463  "TARGET_SIMD"
1465   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1469   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1472   DONE;
1475 ;; For quads.
1477 (define_insn "vec_pack_trunc_<mode>"
1478  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479        (vec_concat:<VNARROWQ2>
1480          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1482  "TARGET_SIMD"
1484    if (BYTES_BIG_ENDIAN)
1485      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1486    else
1487      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1489   [(set_attr "type" "multiple")
1490    (set_attr "length" "8")]
1493 ;; Widening operations.
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498                                (match_operand:VQW 1 "register_operand" "w")
1499                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1500                             )))]
1501   "TARGET_SIMD"
1502   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503   [(set_attr "type" "neon_shift_imm_long")]
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509                                (match_operand:VQW 1 "register_operand" "w")
1510                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1511                             )))]
1512   "TARGET_SIMD"
1513   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514   [(set_attr "type" "neon_shift_imm_long")]
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518   [(match_operand:<VWIDE> 0 "register_operand" "")
1519    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1520   "TARGET_SIMD"
1521   {
1522     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1524                                                           operands[1], p));
1525     DONE;
1526   }
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530   [(match_operand:<VWIDE> 0 "register_operand" "")
1531    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1532   "TARGET_SIMD"
1533   {
1534     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1536                                                           operands[1], p));
1537     DONE;
1538   }
1541 ;; Widening arithmetic.
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1545         (plus:<VWIDE>
1546           (mult:<VWIDE>
1547               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548                  (match_operand:VQW 2 "register_operand" "w")
1549                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551                  (match_operand:VQW 4 "register_operand" "w")
1552                  (match_dup 3))))
1553           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1554   "TARGET_SIMD"
1555   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556   [(set_attr "type" "neon_mla_<Vetype>_long")]
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1561         (plus:<VWIDE>
1562           (mult:<VWIDE>
1563               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564                  (match_operand:VQW 2 "register_operand" "w")
1565                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567                  (match_operand:VQW 4 "register_operand" "w")
1568                  (match_dup 3))))
1569           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1570   "TARGET_SIMD"
1571   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572   [(set_attr "type" "neon_mla_<Vetype>_long")]
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1577         (minus:<VWIDE>
1578           (match_operand:<VWIDE> 1 "register_operand" "0")
1579           (mult:<VWIDE>
1580               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581                  (match_operand:VQW 2 "register_operand" "w")
1582                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584                  (match_operand:VQW 4 "register_operand" "w")
1585                  (match_dup 3))))))]
1586   "TARGET_SIMD"
1587   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588   [(set_attr "type" "neon_mla_<Vetype>_long")]
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1593         (minus:<VWIDE>
1594           (match_operand:<VWIDE> 1 "register_operand" "0")
1595           (mult:<VWIDE>
1596               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597                  (match_operand:VQW 2 "register_operand" "w")
1598                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600                  (match_operand:VQW 4 "register_operand" "w")
1601                  (match_dup 3))))))]
1602   "TARGET_SIMD"
1603   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604   [(set_attr "type" "neon_mla_<Vetype>_long")]
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609         (plus:<VWIDE>
1610           (mult:<VWIDE>
1611             (ANY_EXTEND:<VWIDE>
1612               (match_operand:VD_BHSI 1 "register_operand" "w"))
1613             (ANY_EXTEND:<VWIDE>
1614               (match_operand:VD_BHSI 2 "register_operand" "w")))
1615           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1616   "TARGET_SIMD"
1617   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618   [(set_attr "type" "neon_mla_<Vetype>_long")]
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1623         (minus:<VWIDE>
1624           (match_operand:<VWIDE> 1 "register_operand" "0")
1625           (mult:<VWIDE>
1626             (ANY_EXTEND:<VWIDE>
1627               (match_operand:VD_BHSI 2 "register_operand" "w"))
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1630   "TARGET_SIMD"
1631   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632   [(set_attr "type" "neon_mla_<Vetype>_long")]
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638                            (match_operand:VQW 1 "register_operand" "w")
1639                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641                            (match_operand:VQW 2 "register_operand" "w")
1642                            (match_dup 3)))))]
1643   "TARGET_SIMD"
1644   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645   [(set_attr "type" "neon_mul_<Vetype>_long")]
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649   [(match_operand:<VWIDE> 0 "register_operand" "")
1650    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1652  "TARGET_SIMD"
1654    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1656                                                        operands[1],
1657                                                        operands[2], p));
1658    DONE;
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665                             (match_operand:VQW 1 "register_operand" "w")
1666                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668                             (match_operand:VQW 2 "register_operand" "w")
1669                             (match_dup 3)))))]
1670   "TARGET_SIMD"
1671   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672   [(set_attr "type" "neon_mul_<Vetype>_long")]
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676   [(match_operand:<VWIDE> 0 "register_operand" "")
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1679  "TARGET_SIMD"
1681    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683                                                        operands[1],
1684                                                        operands[2], p));
1685    DONE;
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1696 ;; Floating-point operations can raise an exception.  Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature.  In the
1701 ;; event of a floating-point exception being raised by vectorised
1702 ;; code then:
1703 ;; 1.  If trapped floating-point exceptions are available, then a trap
1704 ;;     will be taken when any lane raises an enabled exception.  A trap
1705 ;;     handler may determine which lane raised the exception.
1706 ;; 2.  Alternatively a sticky exception flag is set in the
1707 ;;     floating-point status register (FPSR).  Software may explicitly
1708 ;;     test the exception flags, in which case the tests will either
1709 ;;     prevent vectorisation, allowing precise identification of the
1710 ;;     failing operation, or if tested outside of vectorisable regions
1711 ;;     then the specific operation and lane are not of interest.
1713 ;; FP arithmetic operations.
1715 (define_insn "add<mode>3"
1716  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718                    (match_operand:VHSDF 2 "register_operand" "w")))]
1719  "TARGET_SIMD"
1720  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 (define_insn "sub<mode>3"
1725  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727                     (match_operand:VHSDF 2 "register_operand" "w")))]
1728  "TARGET_SIMD"
1729  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1733 (define_insn "mul<mode>3"
1734  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736                    (match_operand:VHSDF 2 "register_operand" "w")))]
1737  "TARGET_SIMD"
1738  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1742 (define_expand "div<mode>3"
1743  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745                   (match_operand:VHSDF 2 "register_operand" "w")))]
1746  "TARGET_SIMD"
1748   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1749     DONE;
1751   operands[1] = force_reg (<MODE>mode, operands[1]);
1754 (define_insn "*div<mode>3"
1755  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757                  (match_operand:VHSDF 2 "register_operand" "w")))]
1758  "TARGET_SIMD"
1759  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760   [(set_attr "type" "neon_fp_div_<stype><q>")]
1763 (define_insn "neg<mode>2"
1764  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1766  "TARGET_SIMD"
1767  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1771 (define_insn "abs<mode>2"
1772  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1774  "TARGET_SIMD"
1775  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1779 (define_insn "fma<mode>4"
1780   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782                   (match_operand:VHSDF 2 "register_operand" "w")
1783                   (match_operand:VHSDF 3 "register_operand" "0")))]
1784   "TARGET_SIMD"
1785  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790   [(set (match_operand:VDQF 0 "register_operand" "=w")
1791     (fma:VDQF
1792       (vec_duplicate:VDQF
1793         (vec_select:<VEL>
1794           (match_operand:VDQF 1 "register_operand" "<h_con>")
1795           (parallel [(match_operand:SI 2 "immediate_operand")])))
1796       (match_operand:VDQF 3 "register_operand" "w")
1797       (match_operand:VDQF 4 "register_operand" "0")))]
1798   "TARGET_SIMD"
1799   {
1800     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1802   }
1803   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1808     (fma:VDQSF
1809       (vec_duplicate:VDQSF
1810         (vec_select:<VEL>
1811           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812           (parallel [(match_operand:SI 2 "immediate_operand")])))
1813       (match_operand:VDQSF 3 "register_operand" "w")
1814       (match_operand:VDQSF 4 "register_operand" "0")))]
1815   "TARGET_SIMD"
1816   {
1817     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819   }
1820   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824   [(set (match_operand:VMUL 0 "register_operand" "=w")
1825     (fma:VMUL
1826       (vec_duplicate:VMUL
1827           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828       (match_operand:VMUL 2 "register_operand" "w")
1829       (match_operand:VMUL 3 "register_operand" "0")))]
1830   "TARGET_SIMD"
1831   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836   [(set (match_operand:DF 0 "register_operand" "=w")
1837     (fma:DF
1838         (vec_select:DF
1839           (match_operand:V2DF 1 "register_operand" "w")
1840           (parallel [(match_operand:SI 2 "immediate_operand")]))
1841       (match_operand:DF 3 "register_operand" "w")
1842       (match_operand:DF 4 "register_operand" "0")))]
1843   "TARGET_SIMD"
1844   {
1845     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1847   }
1848   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1851 (define_insn "fnma<mode>4"
1852   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1853         (fma:VHSDF
1854           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855           (match_operand:VHSDF 2 "register_operand" "w")
1856           (match_operand:VHSDF 3 "register_operand" "0")))]
1857   "TARGET_SIMD"
1858   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863   [(set (match_operand:VDQF 0 "register_operand" "=w")
1864     (fma:VDQF
1865       (neg:VDQF
1866         (match_operand:VDQF 3 "register_operand" "w"))
1867       (vec_duplicate:VDQF
1868         (vec_select:<VEL>
1869           (match_operand:VDQF 1 "register_operand" "<h_con>")
1870           (parallel [(match_operand:SI 2 "immediate_operand")])))
1871       (match_operand:VDQF 4 "register_operand" "0")))]
1872   "TARGET_SIMD"
1873   {
1874     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1876   }
1877   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1882     (fma:VDQSF
1883       (neg:VDQSF
1884         (match_operand:VDQSF 3 "register_operand" "w"))
1885       (vec_duplicate:VDQSF
1886         (vec_select:<VEL>
1887           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888           (parallel [(match_operand:SI 2 "immediate_operand")])))
1889       (match_operand:VDQSF 4 "register_operand" "0")))]
1890   "TARGET_SIMD"
1891   {
1892     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894   }
1895   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899   [(set (match_operand:VMUL 0 "register_operand" "=w")
1900     (fma:VMUL
1901       (neg:VMUL
1902         (match_operand:VMUL 2 "register_operand" "w"))
1903       (vec_duplicate:VMUL
1904         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905       (match_operand:VMUL 3 "register_operand" "0")))]
1906   "TARGET_SIMD"
1907   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912   [(set (match_operand:DF 0 "register_operand" "=w")
1913     (fma:DF
1914       (vec_select:DF
1915         (match_operand:V2DF 1 "register_operand" "w")
1916         (parallel [(match_operand:SI 2 "immediate_operand")]))
1917       (neg:DF
1918         (match_operand:DF 3 "register_operand" "w"))
1919       (match_operand:DF 4 "register_operand" "0")))]
1920   "TARGET_SIMD"
1921   {
1922     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1924   }
1925   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1933                        FRINT))]
1934   "TARGET_SIMD"
1935   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936   [(set_attr "type" "neon_fp_round_<stype><q>")]
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944                                [(match_operand:VHSDF 1 "register_operand" "w")]
1945                                FCVT)))]
1946   "TARGET_SIMD"
1947   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953   [(set (match_operand:HI 0 "register_operand" "=w")
1954         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1955                       FCVT)))]
1956   "TARGET_SIMD_F16INST"
1957   "fcvt<frint_suffix><su>\t%h0, %h1"
1958   [(set_attr "type" "neon_fp_to_int_s")]
1961 (define_insn "<optab>_trunchfhi2"
1962   [(set (match_operand:HI 0 "register_operand" "=w")
1963         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964   "TARGET_SIMD_F16INST"
1965   "fcvtz<su>\t%h0, %h1"
1966   [(set_attr "type" "neon_fp_to_int_s")]
1969 (define_insn "<optab>hihf2"
1970   [(set (match_operand:HF 0 "register_operand" "=w")
1971         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972   "TARGET_SIMD_F16INST"
1973   "<su_optab>cvtf\t%h0, %h1"
1974   [(set_attr "type" "neon_int_to_fp_s")]
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980                                [(mult:VDQF
1981          (match_operand:VDQF 1 "register_operand" "w")
1982          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1983                                UNSPEC_FRINTZ)))]
1984   "TARGET_SIMD
1985    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987   {
1988     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1989     char buf[64];
1990     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991     output_asm_insn (buf, operands);
1992     return "";
1993   }
1994   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000                                [(match_operand:VHSDF 1 "register_operand")]
2001                                 UNSPEC_FRINTZ)))]
2002   "TARGET_SIMD"
2003   {})
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008                                [(match_operand:VHSDF 1 "register_operand")]
2009                                 UNSPEC_FRINTZ)))]
2010   "TARGET_SIMD"
2011   {})
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand")
2015         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2016                        UNSPEC_FRINTZ))]
2017   "TARGET_SIMD"
2018   {})
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2022         (FLOATUORS:VHSDF
2023           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2024   "TARGET_SIMD"
2025   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2033 ;; Float widening operations.
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037         (float_extend:<VWIDE> (vec_select:<VHALF>
2038                                (match_operand:VQ_HSF 1 "register_operand" "w")
2039                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2040                             )))]
2041   "TARGET_SIMD"
2042   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043   [(set_attr "type" "neon_fp_cvt_widen_s")]
2046 ;; Convert between fixed-point and floating-point (vector modes)
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050         (unspec:<VHSDF:FCVT_TARGET>
2051           [(match_operand:VHSDF 1 "register_operand" "w")
2052            (match_operand:SI 2 "immediate_operand" "i")]
2053          FCVT_F2FIXED))]
2054   "TARGET_SIMD"
2055   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061         (unspec:<VDQ_HSDI:FCVT_TARGET>
2062           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063            (match_operand:SI 2 "immediate_operand" "i")]
2064          FCVT_FIXED2F))]
2065   "TARGET_SIMD"
2066   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2079 (define_expand "vec_unpacks_lo_<mode>"
2080   [(match_operand:<VWIDE> 0 "register_operand" "")
2081    (match_operand:VQ_HSF 1 "register_operand" "")]
2082   "TARGET_SIMD"
2083   {
2084     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2086                                                        operands[1], p));
2087     DONE;
2088   }
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093         (float_extend:<VWIDE> (vec_select:<VHALF>
2094                                (match_operand:VQ_HSF 1 "register_operand" "w")
2095                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2096                             )))]
2097   "TARGET_SIMD"
2098   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099   [(set_attr "type" "neon_fp_cvt_widen_s")]
2102 (define_expand "vec_unpacks_hi_<mode>"
2103   [(match_operand:<VWIDE> 0 "register_operand" "")
2104    (match_operand:VQ_HSF 1 "register_operand" "")]
2105   "TARGET_SIMD"
2106   {
2107     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2109                                                        operands[1], p));
2110     DONE;
2111   }
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115         (float_extend:<VWIDE>
2116           (match_operand:VDF 1 "register_operand" "w")))]
2117   "TARGET_SIMD"
2118   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119   [(set_attr "type" "neon_fp_cvt_widen_s")]
2122 ;; Float narrowing operations.
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125   [(set (match_operand:VDF 0 "register_operand" "=w")
2126       (float_truncate:VDF
2127         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2128   "TARGET_SIMD"
2129   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2135     (vec_concat:<VDBL>
2136       (match_operand:VDF 1 "register_operand" "0")
2137       (float_truncate:VDF
2138         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2146     (vec_concat:<VDBL>
2147       (float_truncate:VDF
2148         (match_operand:<VWIDE> 2 "register_operand" "w"))
2149       (match_operand:VDF 1 "register_operand" "0")))]
2150   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156   [(match_operand:<VDBL> 0 "register_operand" "=w")
2157    (match_operand:VDF 1 "register_operand" "0")
2158    (match_operand:<VWIDE> 2 "register_operand" "w")]
2159   "TARGET_SIMD"
2161   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164   emit_insn (gen (operands[0], operands[1], operands[2]));
2165   DONE;
2169 (define_expand "vec_pack_trunc_v2df"
2170   [(set (match_operand:V4SF 0 "register_operand")
2171       (vec_concat:V4SF
2172         (float_truncate:V2SF
2173             (match_operand:V2DF 1 "register_operand"))
2174         (float_truncate:V2SF
2175             (match_operand:V2DF 2 "register_operand"))
2176           ))]
2177   "TARGET_SIMD"
2178   {
2179     rtx tmp = gen_reg_rtx (V2SFmode);
2180     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2183     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185                                                    tmp, operands[hi]));
2186     DONE;
2187   }
2190 (define_expand "vec_pack_trunc_df"
2191   [(set (match_operand:V2SF 0 "register_operand")
2192       (vec_concat:V2SF
2193         (float_truncate:SF
2194             (match_operand:DF 1 "register_operand"))
2195         (float_truncate:SF
2196             (match_operand:DF 2 "register_operand"))
2197           ))]
2198   "TARGET_SIMD"
2199   {
2200     rtx tmp = gen_reg_rtx (V2SFmode);
2201     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2204     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2207     DONE;
2208   }
2211 ;; FP Max/Min
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2213 ;; expression like:
2214 ;;      a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2223 ;; NaNs.
2225 (define_insn "<su><maxmin><mode>3"
2226   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228                        (match_operand:VHSDF 2 "register_operand" "w")))]
2229   "TARGET_SIMD"
2230   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240                       (match_operand:VHSDF 2 "register_operand" "w")]
2241                       FMAXMIN_UNS))]
2242   "TARGET_SIMD"
2243   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2247 ;; 'across lanes' add.
2249 (define_expand "reduc_plus_scal_<mode>"
2250   [(match_operand:<VEL> 0 "register_operand" "=w")
2251    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2252                UNSPEC_ADDV)]
2253   "TARGET_SIMD"
2254   {
2255     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256     rtx scratch = gen_reg_rtx (<MODE>mode);
2257     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2259     DONE;
2260   }
2263 (define_insn "aarch64_faddp<mode>"
2264  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266                       (match_operand:VHSDF 2 "register_operand" "w")]
2267         UNSPEC_FADDV))]
2268  "TARGET_SIMD"
2269  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274  [(set (match_operand:VDQV 0 "register_operand" "=w")
2275        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2276                     UNSPEC_ADDV))]
2277  "TARGET_SIMD"
2278  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279   [(set_attr "type" "neon_reduc_add<q>")]
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283  [(set (match_operand:V2SI 0 "register_operand" "=w")
2284        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2285                     UNSPEC_ADDV))]
2286  "TARGET_SIMD"
2287  "addp\\t%0.2s, %1.2s, %1.2s"
2288   [(set_attr "type" "neon_reduc_add")]
2291 (define_insn "reduc_plus_scal_<mode>"
2292  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2294                    UNSPEC_FADDV))]
2295  "TARGET_SIMD"
2296  "faddp\\t%<Vetype>0, %1.<Vtype>"
2297   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2300 (define_expand "reduc_plus_scal_v4sf"
2301  [(set (match_operand:SF 0 "register_operand")
2302        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2303                     UNSPEC_FADDV))]
2304  "TARGET_SIMD"
2306   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307   rtx scratch = gen_reg_rtx (V4SFmode);
2308   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2311   DONE;
2314 (define_insn "clrsb<mode>2"
2315   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2317   "TARGET_SIMD"
2318   "cls\\t%0.<Vtype>, %1.<Vtype>"
2319   [(set_attr "type" "neon_cls<q>")]
2322 (define_insn "clz<mode>2"
2323  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2325  "TARGET_SIMD"
2326  "clz\\t%0.<Vtype>, %1.<Vtype>"
2327   [(set_attr "type" "neon_cls<q>")]
2330 (define_insn "popcount<mode>2"
2331   [(set (match_operand:VB 0 "register_operand" "=w")
2332         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2333   "TARGET_SIMD"
2334   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335   [(set_attr "type" "neon_cnt<q>")]
2338 ;; 'across lanes' max and min ops.
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343   [(match_operand:<VEL> 0 "register_operand")
2344    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2345                   FMAXMINV)]
2346   "TARGET_SIMD"
2347   {
2348     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349     rtx scratch = gen_reg_rtx (<MODE>mode);
2350     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351                                                               operands[1]));
2352     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2353     DONE;
2354   }
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359   [(match_operand:<VEL> 0 "register_operand")
2360    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2361                     MAXMINV)]
2362   "TARGET_SIMD"
2363   {
2364     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365     rtx scratch = gen_reg_rtx (<MODE>mode);
2366     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367                                                               operands[1]));
2368     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2369     DONE;
2370   }
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2376                     MAXMINV))]
2377  "TARGET_SIMD"
2378  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379   [(set_attr "type" "neon_reduc_minmax<q>")]
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383  [(set (match_operand:V2SI 0 "register_operand" "=w")
2384        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2385                     MAXMINV))]
2386  "TARGET_SIMD"
2387  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388   [(set_attr "type" "neon_reduc_minmax")]
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2394                       FMAXMINV))]
2395  "TARGET_SIMD"
2396  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; allocation.
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2403 ;; to select.
2405 ;; Thus our BSL is of the form:
2406 ;;   op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2409 ;;   if (op0 = mask)
2410 ;;     bsl mask, op1, op2
2411 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;;     bit op0, op2, mask
2413 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;;     bif op0, op1, mask
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2422         (xor:VDQ_I
2423            (and:VDQ_I
2424              (xor:VDQ_I
2425                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428           (match_dup:<V_INT_EQUIV> 3)
2429         ))]
2430   "TARGET_SIMD"
2431   "@
2432   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435   [(set_attr "type" "neon_bsl<q>")]
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first.  The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2446         (xor:VDQ_I
2447            (and:VDQ_I
2448              (xor:VDQ_I
2449                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452           (match_dup:<V_INT_EQUIV> 2)))]
2453   "TARGET_SIMD"
2454   "@
2455   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458   [(set_attr "type" "neon_bsl<q>")]
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers.  If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again.  However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2473         (xor:DI
2474            (and:DI
2475              (xor:DI
2476                (match_operand:DI 3 "register_operand" "w,0,w,r")
2477                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2479           (match_dup:DI 3)
2480         ))]
2481   "TARGET_SIMD"
2482   "@
2483   bsl\\t%0.8b, %2.8b, %3.8b
2484   bit\\t%0.8b, %2.8b, %1.8b
2485   bif\\t%0.8b, %3.8b, %1.8b
2486   #"
2487   "&& GP_REGNUM_P (REGNO (operands[0]))"
2488   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2490   /* Split back to individual operations.  If we're before reload, and
2491      able to create a temporary register, do so.  If we're after reload,
2492      we've got an early-clobber destination register, so use that.
2493      Otherwise, we can't create pseudos and we can't yet guarantee that
2494      operands[0] is safe to write, so FAIL to split.  */
2496   rtx scratch;
2497   if (reload_completed)
2498     scratch = operands[0];
2499   else if (can_create_pseudo_p ())
2500     scratch = gen_reg_rtx (DImode);
2501   else
2502     FAIL;
2504   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2507   DONE;
2509   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510    (set_attr "length" "4,4,4,12")]
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2515         (xor:DI
2516            (and:DI
2517              (xor:DI
2518                (match_operand:DI 3 "register_operand" "w,w,0,r")
2519                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2521           (match_dup:DI 2)
2522         ))]
2523   "TARGET_SIMD"
2524   "@
2525   bsl\\t%0.8b, %3.8b, %2.8b
2526   bit\\t%0.8b, %3.8b, %1.8b
2527   bif\\t%0.8b, %2.8b, %1.8b
2528   #"
2529   "&& GP_REGNUM_P (REGNO (operands[0]))"
2530   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2532   /* Split back to individual operations.  If we're before reload, and
2533      able to create a temporary register, do so.  If we're after reload,
2534      we've got an early-clobber destination register, so use that.
2535      Otherwise, we can't create pseudos and we can't yet guarantee that
2536      operands[0] is safe to write, so FAIL to split.  */
2538   rtx scratch;
2539   if (reload_completed)
2540     scratch = operands[0];
2541   else if (can_create_pseudo_p ())
2542     scratch = gen_reg_rtx (DImode);
2543   else
2544     FAIL;
2546   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2549   DONE;
2551   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552    (set_attr "length" "4,4,4,12")]
2555 (define_expand "aarch64_simd_bsl<mode>"
2556   [(match_operand:VALLDIF 0 "register_operand")
2557    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558    (match_operand:VALLDIF 2 "register_operand")
2559    (match_operand:VALLDIF 3 "register_operand")]
2560  "TARGET_SIMD"
2562   /* We can't alias operands together if they have different modes.  */
2563   rtx tmp = operands[0];
2564   if (FLOAT_MODE_P (<MODE>mode))
2565     {
2566       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569     }
2570   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2572                                                          operands[1],
2573                                                          operands[2],
2574                                                          operands[3]));
2575   if (tmp != operands[0])
2576     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2578   DONE;
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582   [(match_operand:VALLDI 0 "register_operand")
2583    (match_operand:VALLDI 1 "nonmemory_operand")
2584    (match_operand:VALLDI 2 "nonmemory_operand")
2585    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2586   "TARGET_SIMD"
2588   /* If we have (a = (P) ? -1 : 0);
2589      Then we can simply move the generated mask (result must be int).  */
2590   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591       && operands[2] == CONST0_RTX (<MODE>mode))
2592     emit_move_insn (operands[0], operands[3]);
2593   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2594   else if (operands[1] == CONST0_RTX (<MODE>mode)
2595            && operands[2] == CONSTM1_RTX (<MODE>mode))
2596     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2597   else
2598     {
2599       if (!REG_P (operands[1]))
2600         operands[1] = force_reg (<MODE>mode, operands[1]);
2601       if (!REG_P (operands[2]))
2602         operands[2] = force_reg (<MODE>mode, operands[2]);
2603       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604                                              operands[1], operands[2]));
2605     }
2607   DONE;
2610 ;; Patterns comparing two vectors to produce a mask.
2612 (define_expand "vec_cmp<mode><mode>"
2613   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614           (match_operator 1 "comparison_operator"
2615             [(match_operand:VSDQ_I_DI 2 "register_operand")
2616              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2617   "TARGET_SIMD"
2619   rtx mask = operands[0];
2620   enum rtx_code code = GET_CODE (operands[1]);
2622   switch (code)
2623     {
2624     case NE:
2625     case LE:
2626     case LT:
2627     case GE:
2628     case GT:
2629     case EQ:
2630       if (operands[3] == CONST0_RTX (<MODE>mode))
2631         break;
2633       /* Fall through.  */
2634     default:
2635       if (!REG_P (operands[3]))
2636         operands[3] = force_reg (<MODE>mode, operands[3]);
2638       break;
2639     }
2641   switch (code)
2642     {
2643     case LT:
2644       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2645       break;
2647     case GE:
2648       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2649       break;
2651     case LE:
2652       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2653       break;
2655     case GT:
2656       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2657       break;
2659     case LTU:
2660       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2661       break;
2663     case GEU:
2664       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2665       break;
2667     case LEU:
2668       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2669       break;
2671     case GTU:
2672       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2673       break;
2675     case NE:
2676       /* Handle NE as !EQ.  */
2677       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2679       break;
2681     case EQ:
2682       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2683       break;
2685     default:
2686       gcc_unreachable ();
2687     }
2689   DONE;
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694         (match_operator 1 "comparison_operator"
2695             [(match_operand:VDQF 2 "register_operand")
2696              (match_operand:VDQF 3 "nonmemory_operand")]))]
2697   "TARGET_SIMD"
2699   int use_zero_form = 0;
2700   enum rtx_code code = GET_CODE (operands[1]);
2701   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2703   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2705   switch (code)
2706     {
2707     case LE:
2708     case LT:
2709     case GE:
2710     case GT:
2711     case EQ:
2712       if (operands[3] == CONST0_RTX (<MODE>mode))
2713         {
2714           use_zero_form = 1;
2715           break;
2716         }
2717       /* Fall through.  */
2718     default:
2719       if (!REG_P (operands[3]))
2720         operands[3] = force_reg (<MODE>mode, operands[3]);
2722       break;
2723     }
2725   switch (code)
2726     {
2727     case LT:
2728       if (use_zero_form)
2729         {
2730           comparison = gen_aarch64_cmlt<mode>;
2731           break;
2732         }
2733       /* Fall through.  */
2734     case UNGE:
2735       std::swap (operands[2], operands[3]);
2736       /* Fall through.  */
2737     case UNLE:
2738     case GT:
2739       comparison = gen_aarch64_cmgt<mode>;
2740       break;
2741     case LE:
2742       if (use_zero_form)
2743         {
2744           comparison = gen_aarch64_cmle<mode>;
2745           break;
2746         }
2747       /* Fall through.  */
2748     case UNGT:
2749       std::swap (operands[2], operands[3]);
2750       /* Fall through.  */
2751     case UNLT:
2752     case GE:
2753       comparison = gen_aarch64_cmge<mode>;
2754       break;
2755     case NE:
2756     case EQ:
2757       comparison = gen_aarch64_cmeq<mode>;
2758       break;
2759     case UNEQ:
2760     case ORDERED:
2761     case UNORDERED:
2762       break;
2763     default:
2764       gcc_unreachable ();
2765     }
2767   switch (code)
2768     {
2769     case UNGE:
2770     case UNGT:
2771     case UNLE:
2772     case UNLT:
2773     case NE:
2774       /* FCM returns false for lanes which are unordered, so if we use
2775          the inverse of the comparison we actually want to emit, then
2776          invert the result, we will end up with the correct result.
2777          Note that a NE NaN and NaN NE b are true for all a, b.
2779          Our transformations are:
2780          a UNGE b -> !(b GT a)
2781          a UNGT b -> !(b GE a)
2782          a UNLE b -> !(a GT b)
2783          a UNLT b -> !(a GE b)
2784          a   NE b -> !(a EQ b)  */
2785       gcc_assert (comparison != NULL);
2786       emit_insn (comparison (operands[0], operands[2], operands[3]));
2787       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2788       break;
2790     case LT:
2791     case LE:
2792     case GT:
2793     case GE:
2794     case EQ:
2795       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2796          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2797          a GE b -> a GE b
2798          a GT b -> a GT b
2799          a LE b -> b GE a
2800          a LT b -> b GT a
2801          a EQ b -> a EQ b  */
2802       gcc_assert (comparison != NULL);
2803       emit_insn (comparison (operands[0], operands[2], operands[3]));
2804       break;
2806     case UNEQ:
2807       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2808          this result will then give us (a == b || a UNORDERED b).  */
2809       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2810                                          operands[2], operands[3]));
2811       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2812       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2813       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2814       break;
2816     case UNORDERED:
2817       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2818          UNORDERED as !ORDERED.  */
2819       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2820       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2821                                          operands[3], operands[2]));
2822       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2823       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2824       break;
2826     case ORDERED:
2827       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2828       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2829                                          operands[3], operands[2]));
2830       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831       break;
2833     default:
2834       gcc_unreachable ();
2835     }
2837   DONE;
2840 (define_expand "vec_cmpu<mode><mode>"
2841   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2842           (match_operator 1 "comparison_operator"
2843             [(match_operand:VSDQ_I_DI 2 "register_operand")
2844              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2845   "TARGET_SIMD"
2847   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2848                                       operands[2], operands[3]));
2849   DONE;
2852 (define_expand "vcond<mode><mode>"
2853   [(set (match_operand:VALLDI 0 "register_operand")
2854         (if_then_else:VALLDI
2855           (match_operator 3 "comparison_operator"
2856             [(match_operand:VALLDI 4 "register_operand")
2857              (match_operand:VALLDI 5 "nonmemory_operand")])
2858           (match_operand:VALLDI 1 "nonmemory_operand")
2859           (match_operand:VALLDI 2 "nonmemory_operand")))]
2860   "TARGET_SIMD"
2862   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2863   enum rtx_code code = GET_CODE (operands[3]);
2865   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2866      it as well as switch operands 1/2 in order to avoid the additional
2867      NOT instruction.  */
2868   if (code == NE)
2869     {
2870       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2871                                     operands[4], operands[5]);
2872       std::swap (operands[1], operands[2]);
2873     }
2874   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2875                                              operands[4], operands[5]));
2876   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877                                                  operands[2], mask));
2879   DONE;
2882 (define_expand "vcond<v_cmp_mixed><mode>"
2883   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2884         (if_then_else:<V_cmp_mixed>
2885           (match_operator 3 "comparison_operator"
2886             [(match_operand:VDQF_COND 4 "register_operand")
2887              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2888           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2889           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2890   "TARGET_SIMD"
2892   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2893   enum rtx_code code = GET_CODE (operands[3]);
2895   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2896      it as well as switch operands 1/2 in order to avoid the additional
2897      NOT instruction.  */
2898   if (code == NE)
2899     {
2900       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2901                                     operands[4], operands[5]);
2902       std::swap (operands[1], operands[2]);
2903     }
2904   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2905                                              operands[4], operands[5]));
2906   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2907                                                 operands[0], operands[1],
2908                                                 operands[2], mask));
2910   DONE;
2913 (define_expand "vcondu<mode><mode>"
2914   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2915         (if_then_else:VSDQ_I_DI
2916           (match_operator 3 "comparison_operator"
2917             [(match_operand:VSDQ_I_DI 4 "register_operand")
2918              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2919           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2920           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2921   "TARGET_SIMD"
2923   rtx mask = gen_reg_rtx (<MODE>mode);
2924   enum rtx_code code = GET_CODE (operands[3]);
2926   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2927      it as well as switch operands 1/2 in order to avoid the additional
2928      NOT instruction.  */
2929   if (code == NE)
2930     {
2931       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2932                                     operands[4], operands[5]);
2933       std::swap (operands[1], operands[2]);
2934     }
2935   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2936                                       operands[4], operands[5]));
2937   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2938                                                  operands[2], mask));
2939   DONE;
2942 (define_expand "vcondu<mode><v_cmp_mixed>"
2943   [(set (match_operand:VDQF 0 "register_operand")
2944         (if_then_else:VDQF
2945           (match_operator 3 "comparison_operator"
2946             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2947              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2948           (match_operand:VDQF 1 "nonmemory_operand")
2949           (match_operand:VDQF 2 "nonmemory_operand")))]
2950   "TARGET_SIMD"
2952   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2953   enum rtx_code code = GET_CODE (operands[3]);
2955   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2956      it as well as switch operands 1/2 in order to avoid the additional
2957      NOT instruction.  */
2958   if (code == NE)
2959     {
2960       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2961                                     operands[4], operands[5]);
2962       std::swap (operands[1], operands[2]);
2963     }
2964   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2965                                                   mask, operands[3],
2966                                                   operands[4], operands[5]));
2967   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2968                                                  operands[2], mask));
2969   DONE;
2972 ;; Patterns for AArch64 SIMD Intrinsics.
2974 ;; Lane extraction with sign extension to general purpose register.
2975 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2976   [(set (match_operand:GPI 0 "register_operand" "=r")
2977         (sign_extend:GPI
2978           (vec_select:<VEL>
2979             (match_operand:VDQQH 1 "register_operand" "w")
2980             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2981   "TARGET_SIMD"
2982   {
2983     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2984     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2985   }
2986   [(set_attr "type" "neon_to_gp<q>")]
2989 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2990   [(set (match_operand:SI 0 "register_operand" "=r")
2991         (zero_extend:SI
2992           (vec_select:<VEL>
2993             (match_operand:VDQQH 1 "register_operand" "w")
2994             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2995   "TARGET_SIMD"
2996   {
2997     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2998     return "umov\\t%w0, %1.<Vetype>[%2]";
2999   }
3000   [(set_attr "type" "neon_to_gp<q>")]
3003 ;; Lane extraction of a value, neither sign nor zero extension
3004 ;; is guaranteed so upper bits should be considered undefined.
3005 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3006 (define_insn "aarch64_get_lane<mode>"
3007   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3008         (vec_select:<VEL>
3009           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3010           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3011   "TARGET_SIMD"
3012   {
3013     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3014     switch (which_alternative)
3015       {
3016         case 0:
3017           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3018         case 1:
3019           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3020         case 2:
3021           return "st1\\t{%1.<Vetype>}[%2], %0";
3022         default:
3023           gcc_unreachable ();
3024       }
3025   }
3026   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3029 (define_insn "load_pair_lanes<mode>"
3030   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3031         (vec_concat:<VDBL>
3032            (match_operand:VDC 1 "memory_operand" "Utq")
3033            (match_operand:VDC 2 "memory_operand" "m")))]
3034   "TARGET_SIMD && !STRICT_ALIGNMENT
3035    && rtx_equal_p (XEXP (operands[2], 0),
3036                    plus_constant (Pmode,
3037                                   XEXP (operands[1], 0),
3038                                   GET_MODE_SIZE (<MODE>mode)))"
3039   "ldr\\t%q0, %1"
3040   [(set_attr "type" "neon_load1_1reg_q")]
3043 (define_insn "store_pair_lanes<mode>"
3044   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3045         (vec_concat:<VDBL>
3046            (match_operand:VDC 1 "register_operand" "w, r")
3047            (match_operand:VDC 2 "register_operand" "w, r")))]
3048   "TARGET_SIMD"
3049   "@
3050    stp\\t%d1, %d2, %0
3051    stp\\t%x1, %x2, %0"
3052   [(set_attr "type" "neon_stp, store_16")]
3055 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3056 ;; dest vector.
3058 (define_insn "*aarch64_combinez<mode>"
3059   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3060         (vec_concat:<VDBL>
3061           (match_operand:VDC 1 "general_operand" "w,?r,m")
3062           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3063   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3064   "@
3065    mov\\t%0.8b, %1.8b
3066    fmov\t%d0, %1
3067    ldr\\t%d0, %1"
3068   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3069    (set_attr "simd" "yes,*,yes")
3070    (set_attr "fp" "*,yes,*")]
3073 (define_insn "*aarch64_combinez_be<mode>"
3074   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3075         (vec_concat:<VDBL>
3076           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3077           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3078   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3079   "@
3080    mov\\t%0.8b, %1.8b
3081    fmov\t%d0, %1
3082    ldr\\t%d0, %1"
3083   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3084    (set_attr "simd" "yes,*,yes")
3085    (set_attr "fp" "*,yes,*")]
3088 (define_expand "aarch64_combine<mode>"
3089   [(match_operand:<VDBL> 0 "register_operand")
3090    (match_operand:VDC 1 "register_operand")
3091    (match_operand:VDC 2 "register_operand")]
3092   "TARGET_SIMD"
3094   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3096   DONE;
3100 (define_expand "aarch64_simd_combine<mode>"
3101   [(match_operand:<VDBL> 0 "register_operand")
3102    (match_operand:VDC 1 "register_operand")
3103    (match_operand:VDC 2 "register_operand")]
3104   "TARGET_SIMD"
3105   {
3106     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3107     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3108     DONE;
3109   }
3110 [(set_attr "type" "multiple")]
3113 ;; <su><addsub>l<q>.
3115 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3116  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3117        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3118                            (match_operand:VQW 1 "register_operand" "w")
3119                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3120                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3121                            (match_operand:VQW 2 "register_operand" "w")
3122                            (match_dup 3)))))]
3123   "TARGET_SIMD"
3124   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3125   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3128 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3129  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3130        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131                            (match_operand:VQW 1 "register_operand" "w")
3132                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3133                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3134                            (match_operand:VQW 2 "register_operand" "w")
3135                            (match_dup 3)))))]
3136   "TARGET_SIMD"
3137   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3138   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3142 (define_expand "aarch64_saddl2<mode>"
3143   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3144    (match_operand:VQW 1 "register_operand" "w")
3145    (match_operand:VQW 2 "register_operand" "w")]
3146   "TARGET_SIMD"
3148   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3149   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3150                                                   operands[2], p));
3151   DONE;
3154 (define_expand "aarch64_uaddl2<mode>"
3155   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3156    (match_operand:VQW 1 "register_operand" "w")
3157    (match_operand:VQW 2 "register_operand" "w")]
3158   "TARGET_SIMD"
3160   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3161   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3162                                                   operands[2], p));
3163   DONE;
3166 (define_expand "aarch64_ssubl2<mode>"
3167   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168    (match_operand:VQW 1 "register_operand" "w")
3169    (match_operand:VQW 2 "register_operand" "w")]
3170   "TARGET_SIMD"
3172   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3174                                                 operands[2], p));
3175   DONE;
3178 (define_expand "aarch64_usubl2<mode>"
3179   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180    (match_operand:VQW 1 "register_operand" "w")
3181    (match_operand:VQW 2 "register_operand" "w")]
3182   "TARGET_SIMD"
3184   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3186                                                 operands[2], p));
3187   DONE;
3190 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3191  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3192        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3193                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3194                        (ANY_EXTEND:<VWIDE>
3195                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3196   "TARGET_SIMD"
3197   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3198   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3201 ;; <su><addsub>w<q>.
3203 (define_expand "widen_ssum<mode>3"
3204   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3205         (plus:<VDBLW> (sign_extend:<VDBLW> 
3206                         (match_operand:VQW 1 "register_operand" ""))
3207                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3208   "TARGET_SIMD"
3209   {
3210     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3211     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3213     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3214                                                 operands[1], p));
3215     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3216     DONE;
3217   }
3220 (define_expand "widen_ssum<mode>3"
3221   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3222         (plus:<VWIDE> (sign_extend:<VWIDE>
3223                         (match_operand:VD_BHSI 1 "register_operand" ""))
3224                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3225   "TARGET_SIMD"
3227   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3228   DONE;
3231 (define_expand "widen_usum<mode>3"
3232   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3233         (plus:<VDBLW> (zero_extend:<VDBLW> 
3234                         (match_operand:VQW 1 "register_operand" ""))
3235                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3236   "TARGET_SIMD"
3237   {
3238     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3239     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3241     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3242                                                  operands[1], p));
3243     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3244     DONE;
3245   }
3248 (define_expand "widen_usum<mode>3"
3249   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3250         (plus:<VWIDE> (zero_extend:<VWIDE>
3251                         (match_operand:VD_BHSI 1 "register_operand" ""))
3252                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3253   "TARGET_SIMD"
3255   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3256   DONE;
3259 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3260   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3261         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3262                         (ANY_EXTEND:<VWIDE>
3263                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3264   "TARGET_SIMD"
3265   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3266   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3270   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3272                         (ANY_EXTEND:<VWIDE>
3273                           (vec_select:<VHALF>
3274                            (match_operand:VQW 2 "register_operand" "w")
3275                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3276   "TARGET_SIMD"
3277   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3278   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3281 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3282   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3283         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3284                         (ANY_EXTEND:<VWIDE>
3285                           (vec_select:<VHALF>
3286                            (match_operand:VQW 2 "register_operand" "w")
3287                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3288   "TARGET_SIMD"
3289   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3293 (define_expand "aarch64_saddw2<mode>"
3294   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3295    (match_operand:<VWIDE> 1 "register_operand" "w")
3296    (match_operand:VQW 2 "register_operand" "w")]
3297   "TARGET_SIMD"
3299   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3301                                                 operands[2], p));
3302   DONE;
3305 (define_expand "aarch64_uaddw2<mode>"
3306   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3307    (match_operand:<VWIDE> 1 "register_operand" "w")
3308    (match_operand:VQW 2 "register_operand" "w")]
3309   "TARGET_SIMD"
3311   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3312   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3313                                                 operands[2], p));
3314   DONE;
3318 (define_expand "aarch64_ssubw2<mode>"
3319   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3320    (match_operand:<VWIDE> 1 "register_operand" "w")
3321    (match_operand:VQW 2 "register_operand" "w")]
3322   "TARGET_SIMD"
3324   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3325   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3326                                                 operands[2], p));
3327   DONE;
3330 (define_expand "aarch64_usubw2<mode>"
3331   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3332    (match_operand:<VWIDE> 1 "register_operand" "w")
3333    (match_operand:VQW 2 "register_operand" "w")]
3334   "TARGET_SIMD"
3336   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3337   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3338                                                 operands[2], p));
3339   DONE;
3342 ;; <su><r>h<addsub>.
3344 (define_insn "aarch64_<sur>h<addsub><mode>"
3345   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3346         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3347                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3348                      HADDSUB))]
3349   "TARGET_SIMD"
3350   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3351   [(set_attr "type" "neon_<addsub>_halve<q>")]
3354 ;; <r><addsub>hn<q>.
3356 (define_insn "aarch64_<sur><addsub>hn<mode>"
3357   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3358         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3359                             (match_operand:VQN 2 "register_operand" "w")]
3360                            ADDSUBHN))]
3361   "TARGET_SIMD"
3362   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3363   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3366 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3367   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3368         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3369                              (match_operand:VQN 2 "register_operand" "w")
3370                              (match_operand:VQN 3 "register_operand" "w")]
3371                             ADDSUBHN2))]
3372   "TARGET_SIMD"
3373   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3374   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3377 ;; pmul.
3379 (define_insn "aarch64_pmul<mode>"
3380   [(set (match_operand:VB 0 "register_operand" "=w")
3381         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3382                     (match_operand:VB 2 "register_operand" "w")]
3383                    UNSPEC_PMUL))]
3384  "TARGET_SIMD"
3385  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3386   [(set_attr "type" "neon_mul_<Vetype><q>")]
3389 ;; fmulx.
3391 (define_insn "aarch64_fmulx<mode>"
3392   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3393         (unspec:VHSDF_HSDF
3394           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3395            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3396            UNSPEC_FMULX))]
3397  "TARGET_SIMD"
3398  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3399  [(set_attr "type" "neon_fp_mul_<stype>")]
3402 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3404 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3405   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3406         (unspec:VDQSF
3407          [(match_operand:VDQSF 1 "register_operand" "w")
3408           (vec_duplicate:VDQSF
3409            (vec_select:<VEL>
3410             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3411             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3412          UNSPEC_FMULX))]
3413   "TARGET_SIMD"
3414   {
3415     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3416     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3417   }
3418   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3421 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3423 (define_insn "*aarch64_mulx_elt<mode>"
3424   [(set (match_operand:VDQF 0 "register_operand" "=w")
3425         (unspec:VDQF
3426          [(match_operand:VDQF 1 "register_operand" "w")
3427           (vec_duplicate:VDQF
3428            (vec_select:<VEL>
3429             (match_operand:VDQF 2 "register_operand" "w")
3430             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3431          UNSPEC_FMULX))]
3432   "TARGET_SIMD"
3433   {
3434     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3435     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3436   }
3437   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3440 ;; vmulxq_lane
3442 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3443   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3444         (unspec:VHSDF
3445          [(match_operand:VHSDF 1 "register_operand" "w")
3446           (vec_duplicate:VHSDF
3447             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3448          UNSPEC_FMULX))]
3449   "TARGET_SIMD"
3450   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3451   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3454 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3455 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3456 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3458 (define_insn "*aarch64_vgetfmulx<mode>"
3459   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3460         (unspec:<VEL>
3461          [(match_operand:<VEL> 1 "register_operand" "w")
3462           (vec_select:<VEL>
3463            (match_operand:VDQF 2 "register_operand" "w")
3464             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3465          UNSPEC_FMULX))]
3466   "TARGET_SIMD"
3467   {
3468     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3469     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3470   }
3471   [(set_attr "type" "fmul<Vetype>")]
3473 ;; <su>q<addsub>
3475 (define_insn "aarch64_<su_optab><optab><mode>"
3476   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3477         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3478                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3479   "TARGET_SIMD"
3480   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3481   [(set_attr "type" "neon_<optab><q>")]
3484 ;; suqadd and usqadd
3486 (define_insn "aarch64_<sur>qadd<mode>"
3487   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3488         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3489                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3490                        USSUQADD))]
3491   "TARGET_SIMD"
3492   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3493   [(set_attr "type" "neon_qadd<q>")]
3496 ;; sqmovun
3498 (define_insn "aarch64_sqmovun<mode>"
3499   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3500         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3501                             UNSPEC_SQXTUN))]
3502    "TARGET_SIMD"
3503    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3504    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3507 ;; sqmovn and uqmovn
3509 (define_insn "aarch64_<sur>qmovn<mode>"
3510   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3511         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3512                             SUQMOVN))]
3513   "TARGET_SIMD"
3514   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3515    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3518 ;; <su>q<absneg>
3520 (define_insn "aarch64_s<optab><mode>"
3521   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3522         (UNQOPS:VSDQ_I
3523           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3524   "TARGET_SIMD"
3525   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3526   [(set_attr "type" "neon_<optab><q>")]
3529 ;; sq<r>dmulh.
3531 (define_insn "aarch64_sq<r>dmulh<mode>"
3532   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3533         (unspec:VSDQ_HSI
3534           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3535            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3536          VQDMULH))]
3537   "TARGET_SIMD"
3538   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3539   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3542 ;; sq<r>dmulh_lane
3544 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3545   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3546         (unspec:VDQHS
3547           [(match_operand:VDQHS 1 "register_operand" "w")
3548            (vec_select:<VEL>
3549              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3550              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3551          VQDMULH))]
3552   "TARGET_SIMD"
3553   "*
3554    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3555    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3556   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3559 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3560   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3561         (unspec:VDQHS
3562           [(match_operand:VDQHS 1 "register_operand" "w")
3563            (vec_select:<VEL>
3564              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3565              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3566          VQDMULH))]
3567   "TARGET_SIMD"
3568   "*
3569    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3570    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3571   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3574 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3575   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3576         (unspec:SD_HSI
3577           [(match_operand:SD_HSI 1 "register_operand" "w")
3578            (vec_select:<VEL>
3579              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3580              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3581          VQDMULH))]
3582   "TARGET_SIMD"
3583   "*
3584    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3585    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3586   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3589 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3590   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3591         (unspec:SD_HSI
3592           [(match_operand:SD_HSI 1 "register_operand" "w")
3593            (vec_select:<VEL>
3594              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3595              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3596          VQDMULH))]
3597   "TARGET_SIMD"
3598   "*
3599    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3600    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3601   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3604 ;; sqrdml[as]h.
3606 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3607   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3608         (unspec:VSDQ_HSI
3609           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3610            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3611            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3612           SQRDMLH_AS))]
3613    "TARGET_SIMD_RDMA"
3614    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3615    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3618 ;; sqrdml[as]h_lane.
3620 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3621   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3622         (unspec:VDQHS
3623           [(match_operand:VDQHS 1 "register_operand" "0")
3624            (match_operand:VDQHS 2 "register_operand" "w")
3625            (vec_select:<VEL>
3626              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3628           SQRDMLH_AS))]
3629    "TARGET_SIMD_RDMA"
3630    {
3631      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3632      return
3633       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3634    }
3635    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3640         (unspec:SD_HSI
3641           [(match_operand:SD_HSI 1 "register_operand" "0")
3642            (match_operand:SD_HSI 2 "register_operand" "w")
3643            (vec_select:<VEL>
3644              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646           SQRDMLH_AS))]
3647    "TARGET_SIMD_RDMA"
3648    {
3649      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650      return
3651       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3652    }
3653    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3656 ;; sqrdml[as]h_laneq.
3658 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3659   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3660         (unspec:VDQHS
3661           [(match_operand:VDQHS 1 "register_operand" "0")
3662            (match_operand:VDQHS 2 "register_operand" "w")
3663            (vec_select:<VEL>
3664              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3665              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3666           SQRDMLH_AS))]
3667    "TARGET_SIMD_RDMA"
3668    {
3669      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3670      return
3671       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3672    }
3673    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3678         (unspec:SD_HSI
3679           [(match_operand:SD_HSI 1 "register_operand" "0")
3680            (match_operand:SD_HSI 2 "register_operand" "w")
3681            (vec_select:<VEL>
3682              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684           SQRDMLH_AS))]
3685    "TARGET_SIMD_RDMA"
3686    {
3687      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688      return
3689       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3690    }
3691    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3694 ;; vqdml[sa]l
3696 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3697   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3698         (SBINQOPS:<VWIDE>
3699           (match_operand:<VWIDE> 1 "register_operand" "0")
3700           (ss_ashift:<VWIDE>
3701               (mult:<VWIDE>
3702                 (sign_extend:<VWIDE>
3703                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3704                 (sign_extend:<VWIDE>
3705                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3706               (const_int 1))))]
3707   "TARGET_SIMD"
3708   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3712 ;; vqdml[sa]l_lane
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3715   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716         (SBINQOPS:<VWIDE>
3717           (match_operand:<VWIDE> 1 "register_operand" "0")
3718           (ss_ashift:<VWIDE>
3719             (mult:<VWIDE>
3720               (sign_extend:<VWIDE>
3721                 (match_operand:VD_HSI 2 "register_operand" "w"))
3722               (sign_extend:<VWIDE>
3723                 (vec_duplicate:VD_HSI
3724                   (vec_select:<VEL>
3725                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3726                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3727               ))
3728             (const_int 1))))]
3729   "TARGET_SIMD"
3730   {
3731     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3732     return
3733       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3734   }
3735   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3739   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740         (SBINQOPS:<VWIDE>
3741           (match_operand:<VWIDE> 1 "register_operand" "0")
3742           (ss_ashift:<VWIDE>
3743             (mult:<VWIDE>
3744               (sign_extend:<VWIDE>
3745                 (match_operand:VD_HSI 2 "register_operand" "w"))
3746               (sign_extend:<VWIDE>
3747                 (vec_duplicate:VD_HSI
3748                   (vec_select:<VEL>
3749                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3750                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751               ))
3752             (const_int 1))))]
3753   "TARGET_SIMD"
3754   {
3755     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3756     return
3757       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758   }
3759   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3763   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764         (SBINQOPS:<VWIDE>
3765           (match_operand:<VWIDE> 1 "register_operand" "0")
3766           (ss_ashift:<VWIDE>
3767             (mult:<VWIDE>
3768               (sign_extend:<VWIDE>
3769                 (match_operand:SD_HSI 2 "register_operand" "w"))
3770               (sign_extend:<VWIDE>
3771                 (vec_select:<VEL>
3772                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3773                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3774               )
3775             (const_int 1))))]
3776   "TARGET_SIMD"
3777   {
3778     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3779     return
3780       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3781   }
3782   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3785 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3786   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3787         (SBINQOPS:<VWIDE>
3788           (match_operand:<VWIDE> 1 "register_operand" "0")
3789           (ss_ashift:<VWIDE>
3790             (mult:<VWIDE>
3791               (sign_extend:<VWIDE>
3792                 (match_operand:SD_HSI 2 "register_operand" "w"))
3793               (sign_extend:<VWIDE>
3794                 (vec_select:<VEL>
3795                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3796                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3797               )
3798             (const_int 1))))]
3799   "TARGET_SIMD"
3800   {
3801     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3802     return
3803       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3804   }
3805   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3808 ;; vqdml[sa]l_n
3810 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3811   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3812         (SBINQOPS:<VWIDE>
3813           (match_operand:<VWIDE> 1 "register_operand" "0")
3814           (ss_ashift:<VWIDE>
3815               (mult:<VWIDE>
3816                 (sign_extend:<VWIDE>
3817                       (match_operand:VD_HSI 2 "register_operand" "w"))
3818                 (sign_extend:<VWIDE>
3819                   (vec_duplicate:VD_HSI
3820                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3821               (const_int 1))))]
3822   "TARGET_SIMD"
3823   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3824   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3827 ;; sqdml[as]l2
3829 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3830   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3831         (SBINQOPS:<VWIDE>
3832          (match_operand:<VWIDE> 1 "register_operand" "0")
3833          (ss_ashift:<VWIDE>
3834              (mult:<VWIDE>
3835                (sign_extend:<VWIDE>
3836                  (vec_select:<VHALF>
3837                      (match_operand:VQ_HSI 2 "register_operand" "w")
3838                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3839                (sign_extend:<VWIDE>
3840                  (vec_select:<VHALF>
3841                      (match_operand:VQ_HSI 3 "register_operand" "w")
3842                      (match_dup 4))))
3843              (const_int 1))))]
3844   "TARGET_SIMD"
3845   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3846   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849 (define_expand "aarch64_sqdmlal2<mode>"
3850   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3851    (match_operand:<VWIDE> 1 "register_operand" "w")
3852    (match_operand:VQ_HSI 2 "register_operand" "w")
3853    (match_operand:VQ_HSI 3 "register_operand" "w")]
3854   "TARGET_SIMD"
3856   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3857   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3858                                                   operands[2], operands[3], p));
3859   DONE;
3862 (define_expand "aarch64_sqdmlsl2<mode>"
3863   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3864    (match_operand:<VWIDE> 1 "register_operand" "w")
3865    (match_operand:VQ_HSI 2 "register_operand" "w")
3866    (match_operand:VQ_HSI 3 "register_operand" "w")]
3867   "TARGET_SIMD"
3869   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3870   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3871                                                   operands[2], operands[3], p));
3872   DONE;
3875 ;; vqdml[sa]l2_lane
3877 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3878   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3879         (SBINQOPS:<VWIDE>
3880           (match_operand:<VWIDE> 1 "register_operand" "0")
3881           (ss_ashift:<VWIDE>
3882               (mult:<VWIDE>
3883                 (sign_extend:<VWIDE>
3884                   (vec_select:<VHALF>
3885                     (match_operand:VQ_HSI 2 "register_operand" "w")
3886                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3887                 (sign_extend:<VWIDE>
3888                   (vec_duplicate:<VHALF>
3889                     (vec_select:<VEL>
3890                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3891                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3892                     ))))
3893               (const_int 1))))]
3894   "TARGET_SIMD"
3895   {
3896     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3897     return
3898      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3899   }
3900   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3903 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3904   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3905         (SBINQOPS:<VWIDE>
3906           (match_operand:<VWIDE> 1 "register_operand" "0")
3907           (ss_ashift:<VWIDE>
3908               (mult:<VWIDE>
3909                 (sign_extend:<VWIDE>
3910                   (vec_select:<VHALF>
3911                     (match_operand:VQ_HSI 2 "register_operand" "w")
3912                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3913                 (sign_extend:<VWIDE>
3914                   (vec_duplicate:<VHALF>
3915                     (vec_select:<VEL>
3916                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3917                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3918                     ))))
3919               (const_int 1))))]
3920   "TARGET_SIMD"
3921   {
3922     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3923     return
3924      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3925   }
3926   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3929 (define_expand "aarch64_sqdmlal2_lane<mode>"
3930   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3931    (match_operand:<VWIDE> 1 "register_operand" "w")
3932    (match_operand:VQ_HSI 2 "register_operand" "w")
3933    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3934    (match_operand:SI 4 "immediate_operand" "i")]
3935   "TARGET_SIMD"
3937   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3938   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3939                                                        operands[2], operands[3],
3940                                                        operands[4], p));
3941   DONE;
3944 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3945   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3946    (match_operand:<VWIDE> 1 "register_operand" "w")
3947    (match_operand:VQ_HSI 2 "register_operand" "w")
3948    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3949    (match_operand:SI 4 "immediate_operand" "i")]
3950   "TARGET_SIMD"
3952   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3953   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3954                                                        operands[2], operands[3],
3955                                                        operands[4], p));
3956   DONE;
3959 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3960   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3961    (match_operand:<VWIDE> 1 "register_operand" "w")
3962    (match_operand:VQ_HSI 2 "register_operand" "w")
3963    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3964    (match_operand:SI 4 "immediate_operand" "i")]
3965   "TARGET_SIMD"
3967   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3968   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3969                                                        operands[2], operands[3],
3970                                                        operands[4], p));
3971   DONE;
3974 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3975   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3976    (match_operand:<VWIDE> 1 "register_operand" "w")
3977    (match_operand:VQ_HSI 2 "register_operand" "w")
3978    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3979    (match_operand:SI 4 "immediate_operand" "i")]
3980   "TARGET_SIMD"
3982   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3983   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3984                                                        operands[2], operands[3],
3985                                                        operands[4], p));
3986   DONE;
3989 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3990   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991         (SBINQOPS:<VWIDE>
3992           (match_operand:<VWIDE> 1 "register_operand" "0")
3993           (ss_ashift:<VWIDE>
3994             (mult:<VWIDE>
3995               (sign_extend:<VWIDE>
3996                 (vec_select:<VHALF>
3997                   (match_operand:VQ_HSI 2 "register_operand" "w")
3998                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3999               (sign_extend:<VWIDE>
4000                 (vec_duplicate:<VHALF>
4001                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4002             (const_int 1))))]
4003   "TARGET_SIMD"
4004   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4005   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4008 (define_expand "aarch64_sqdmlal2_n<mode>"
4009   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4010    (match_operand:<VWIDE> 1 "register_operand" "w")
4011    (match_operand:VQ_HSI 2 "register_operand" "w")
4012    (match_operand:<VEL> 3 "register_operand" "w")]
4013   "TARGET_SIMD"
4015   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4017                                                     operands[2], operands[3],
4018                                                     p));
4019   DONE;
4022 (define_expand "aarch64_sqdmlsl2_n<mode>"
4023   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024    (match_operand:<VWIDE> 1 "register_operand" "w")
4025    (match_operand:VQ_HSI 2 "register_operand" "w")
4026    (match_operand:<VEL> 3 "register_operand" "w")]
4027   "TARGET_SIMD"
4029   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4030   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4031                                                     operands[2], operands[3],
4032                                                     p));
4033   DONE;
4036 ;; vqdmull
4038 (define_insn "aarch64_sqdmull<mode>"
4039   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4040         (ss_ashift:<VWIDE>
4041              (mult:<VWIDE>
4042                (sign_extend:<VWIDE>
4043                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4044                (sign_extend:<VWIDE>
4045                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4046              (const_int 1)))]
4047   "TARGET_SIMD"
4048   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4049   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4052 ;; vqdmull_lane
4054 (define_insn "aarch64_sqdmull_lane<mode>"
4055   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4056         (ss_ashift:<VWIDE>
4057              (mult:<VWIDE>
4058                (sign_extend:<VWIDE>
4059                  (match_operand:VD_HSI 1 "register_operand" "w"))
4060                (sign_extend:<VWIDE>
4061                  (vec_duplicate:VD_HSI
4062                    (vec_select:<VEL>
4063                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4064                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4065                ))
4066              (const_int 1)))]
4067   "TARGET_SIMD"
4068   {
4069     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4070     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4071   }
4072   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4075 (define_insn "aarch64_sqdmull_laneq<mode>"
4076   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4077         (ss_ashift:<VWIDE>
4078              (mult:<VWIDE>
4079                (sign_extend:<VWIDE>
4080                  (match_operand:VD_HSI 1 "register_operand" "w"))
4081                (sign_extend:<VWIDE>
4082                  (vec_duplicate:VD_HSI
4083                    (vec_select:<VEL>
4084                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4085                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4086                ))
4087              (const_int 1)))]
4088   "TARGET_SIMD"
4089   {
4090     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4091     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4092   }
4093   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4096 (define_insn "aarch64_sqdmull_lane<mode>"
4097   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4098         (ss_ashift:<VWIDE>
4099              (mult:<VWIDE>
4100                (sign_extend:<VWIDE>
4101                  (match_operand:SD_HSI 1 "register_operand" "w"))
4102                (sign_extend:<VWIDE>
4103                  (vec_select:<VEL>
4104                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4105                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4106                ))
4107              (const_int 1)))]
4108   "TARGET_SIMD"
4109   {
4110     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4111     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4112   }
4113   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4116 (define_insn "aarch64_sqdmull_laneq<mode>"
4117   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118         (ss_ashift:<VWIDE>
4119              (mult:<VWIDE>
4120                (sign_extend:<VWIDE>
4121                  (match_operand:SD_HSI 1 "register_operand" "w"))
4122                (sign_extend:<VWIDE>
4123                  (vec_select:<VEL>
4124                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4125                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4126                ))
4127              (const_int 1)))]
4128   "TARGET_SIMD"
4129   {
4130     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4131     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4132   }
4133   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4136 ;; vqdmull_n
4138 (define_insn "aarch64_sqdmull_n<mode>"
4139   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140         (ss_ashift:<VWIDE>
4141              (mult:<VWIDE>
4142                (sign_extend:<VWIDE>
4143                  (match_operand:VD_HSI 1 "register_operand" "w"))
4144                (sign_extend:<VWIDE>
4145                  (vec_duplicate:VD_HSI
4146                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4147                )
4148              (const_int 1)))]
4149   "TARGET_SIMD"
4150   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4151   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4154 ;; vqdmull2
4158 (define_insn "aarch64_sqdmull2<mode>_internal"
4159   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160         (ss_ashift:<VWIDE>
4161              (mult:<VWIDE>
4162                (sign_extend:<VWIDE>
4163                  (vec_select:<VHALF>
4164                    (match_operand:VQ_HSI 1 "register_operand" "w")
4165                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4166                (sign_extend:<VWIDE>
4167                  (vec_select:<VHALF>
4168                    (match_operand:VQ_HSI 2 "register_operand" "w")
4169                    (match_dup 3)))
4170                )
4171              (const_int 1)))]
4172   "TARGET_SIMD"
4173   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4174   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4177 (define_expand "aarch64_sqdmull2<mode>"
4178   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4179    (match_operand:VQ_HSI 1 "register_operand" "w")
4180    (match_operand:VQ_HSI 2 "register_operand" "w")]
4181   "TARGET_SIMD"
4183   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4184   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4185                                                   operands[2], p));
4186   DONE;
4189 ;; vqdmull2_lane
4191 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4192   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4193         (ss_ashift:<VWIDE>
4194              (mult:<VWIDE>
4195                (sign_extend:<VWIDE>
4196                  (vec_select:<VHALF>
4197                    (match_operand:VQ_HSI 1 "register_operand" "w")
4198                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4199                (sign_extend:<VWIDE>
4200                  (vec_duplicate:<VHALF>
4201                    (vec_select:<VEL>
4202                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4203                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4204                ))
4205              (const_int 1)))]
4206   "TARGET_SIMD"
4207   {
4208     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4209     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4210   }
4211   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4214 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4215   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4216         (ss_ashift:<VWIDE>
4217              (mult:<VWIDE>
4218                (sign_extend:<VWIDE>
4219                  (vec_select:<VHALF>
4220                    (match_operand:VQ_HSI 1 "register_operand" "w")
4221                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4222                (sign_extend:<VWIDE>
4223                  (vec_duplicate:<VHALF>
4224                    (vec_select:<VEL>
4225                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4226                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4227                ))
4228              (const_int 1)))]
4229   "TARGET_SIMD"
4230   {
4231     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4232     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4233   }
4234   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4237 (define_expand "aarch64_sqdmull2_lane<mode>"
4238   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4239    (match_operand:VQ_HSI 1 "register_operand" "w")
4240    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4241    (match_operand:SI 3 "immediate_operand" "i")]
4242   "TARGET_SIMD"
4244   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4245   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4246                                                        operands[2], operands[3],
4247                                                        p));
4248   DONE;
4251 (define_expand "aarch64_sqdmull2_laneq<mode>"
4252   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4253    (match_operand:VQ_HSI 1 "register_operand" "w")
4254    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4255    (match_operand:SI 3 "immediate_operand" "i")]
4256   "TARGET_SIMD"
4258   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4259   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4260                                                        operands[2], operands[3],
4261                                                        p));
4262   DONE;
4265 ;; vqdmull2_n
4267 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4268   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4269         (ss_ashift:<VWIDE>
4270              (mult:<VWIDE>
4271                (sign_extend:<VWIDE>
4272                  (vec_select:<VHALF>
4273                    (match_operand:VQ_HSI 1 "register_operand" "w")
4274                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4275                (sign_extend:<VWIDE>
4276                  (vec_duplicate:<VHALF>
4277                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4278                )
4279              (const_int 1)))]
4280   "TARGET_SIMD"
4281   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4282   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4285 (define_expand "aarch64_sqdmull2_n<mode>"
4286   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4287    (match_operand:VQ_HSI 1 "register_operand" "w")
4288    (match_operand:<VEL> 2 "register_operand" "w")]
4289   "TARGET_SIMD"
4291   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4292   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4293                                                     operands[2], p));
4294   DONE;
4297 ;; vshl
4299 (define_insn "aarch64_<sur>shl<mode>"
4300   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4301         (unspec:VSDQ_I_DI
4302           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4303            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4304          VSHL))]
4305   "TARGET_SIMD"
4306   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4307   [(set_attr "type" "neon_shift_reg<q>")]
4311 ;; vqshl
4313 (define_insn "aarch64_<sur>q<r>shl<mode>"
4314   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4315         (unspec:VSDQ_I
4316           [(match_operand:VSDQ_I 1 "register_operand" "w")
4317            (match_operand:VSDQ_I 2 "register_operand" "w")]
4318          VQSHL))]
4319   "TARGET_SIMD"
4320   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4321   [(set_attr "type" "neon_sat_shift_reg<q>")]
4324 ;; vshll_n
4326 (define_insn "aarch64_<sur>shll_n<mode>"
4327   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4328         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4329                          (match_operand:SI 2
4330                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4331                          VSHLL))]
4332   "TARGET_SIMD"
4333   {
4334     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4335       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4336     else
4337       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4338   }
4339   [(set_attr "type" "neon_shift_imm_long")]
4342 ;; vshll_high_n
4344 (define_insn "aarch64_<sur>shll2_n<mode>"
4345   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4347                          (match_operand:SI 2 "immediate_operand" "i")]
4348                          VSHLL))]
4349   "TARGET_SIMD"
4350   {
4351     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4352       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4353     else
4354       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4355   }
4356   [(set_attr "type" "neon_shift_imm_long")]
4359 ;; vrshr_n
4361 (define_insn "aarch64_<sur>shr_n<mode>"
4362   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4363         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4364                            (match_operand:SI 2
4365                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4366                           VRSHR_N))]
4367   "TARGET_SIMD"
4368   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4369   [(set_attr "type" "neon_sat_shift_imm<q>")]
4372 ;; v(r)sra_n
4374 (define_insn "aarch64_<sur>sra_n<mode>"
4375   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4376         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4377                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4378                        (match_operand:SI 3
4379                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4380                       VSRA))]
4381   "TARGET_SIMD"
4382   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4383   [(set_attr "type" "neon_shift_acc<q>")]
4386 ;; vs<lr>i_n
4388 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4389   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4390         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4391                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4392                        (match_operand:SI 3
4393                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4394                       VSLRI))]
4395   "TARGET_SIMD"
4396   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4397   [(set_attr "type" "neon_shift_imm<q>")]
4400 ;; vqshl(u)
4402 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4403   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4404         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4405                        (match_operand:SI 2
4406                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4407                       VQSHL_N))]
4408   "TARGET_SIMD"
4409   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4410   [(set_attr "type" "neon_sat_shift_imm<q>")]
4414 ;; vq(r)shr(u)n_n
4416 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4417   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4418         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4419                             (match_operand:SI 2
4420                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4421                            VQSHRN_N))]
4422   "TARGET_SIMD"
4423   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4424   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4428 ;; cm(eq|ge|gt|lt|le)
4429 ;; Note, we have constraints for Dz and Z as different expanders
4430 ;; have different ideas of what should be passed to this pattern.
4432 (define_insn "aarch64_cm<optab><mode>"
4433   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4434         (neg:<V_INT_EQUIV>
4435           (COMPARISONS:<V_INT_EQUIV>
4436             (match_operand:VDQ_I 1 "register_operand" "w,w")
4437             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4438           )))]
4439   "TARGET_SIMD"
4440   "@
4441   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4442   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4443   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4446 (define_insn_and_split "aarch64_cm<optab>di"
4447   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4448         (neg:DI
4449           (COMPARISONS:DI
4450             (match_operand:DI 1 "register_operand" "w,w,r")
4451             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4452           )))
4453      (clobber (reg:CC CC_REGNUM))]
4454   "TARGET_SIMD"
4455   "#"
4456   "reload_completed"
4457   [(set (match_operand:DI 0 "register_operand")
4458         (neg:DI
4459           (COMPARISONS:DI
4460             (match_operand:DI 1 "register_operand")
4461             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4462           )))]
4463   {
4464     /* If we are in the general purpose register file,
4465        we split to a sequence of comparison and store.  */
4466     if (GP_REGNUM_P (REGNO (operands[0]))
4467         && GP_REGNUM_P (REGNO (operands[1])))
4468       {
4469         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4470         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4471         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4472         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4473         DONE;
4474       }
4475     /* Otherwise, we expand to a similar pattern which does not
4476        clobber CC_REGNUM.  */
4477   }
4478   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4481 (define_insn "*aarch64_cm<optab>di"
4482   [(set (match_operand:DI 0 "register_operand" "=w,w")
4483         (neg:DI
4484           (COMPARISONS:DI
4485             (match_operand:DI 1 "register_operand" "w,w")
4486             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4487           )))]
4488   "TARGET_SIMD && reload_completed"
4489   "@
4490   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4491   cm<optab>\t%d0, %d1, #0"
4492   [(set_attr "type" "neon_compare, neon_compare_zero")]
4495 ;; cm(hs|hi)
4497 (define_insn "aarch64_cm<optab><mode>"
4498   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4499         (neg:<V_INT_EQUIV>
4500           (UCOMPARISONS:<V_INT_EQUIV>
4501             (match_operand:VDQ_I 1 "register_operand" "w")
4502             (match_operand:VDQ_I 2 "register_operand" "w")
4503           )))]
4504   "TARGET_SIMD"
4505   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4506   [(set_attr "type" "neon_compare<q>")]
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510   [(set (match_operand:DI 0 "register_operand" "=w,r")
4511         (neg:DI
4512           (UCOMPARISONS:DI
4513             (match_operand:DI 1 "register_operand" "w,r")
4514             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4515           )))
4516     (clobber (reg:CC CC_REGNUM))]
4517   "TARGET_SIMD"
4518   "#"
4519   "reload_completed"
4520   [(set (match_operand:DI 0 "register_operand")
4521         (neg:DI
4522           (UCOMPARISONS:DI
4523             (match_operand:DI 1 "register_operand")
4524             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4525           )))]
4526   {
4527     /* If we are in the general purpose register file,
4528        we split to a sequence of comparison and store.  */
4529     if (GP_REGNUM_P (REGNO (operands[0]))
4530         && GP_REGNUM_P (REGNO (operands[1])))
4531       {
4532         machine_mode mode = CCmode;
4533         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4536         DONE;
4537       }
4538     /* Otherwise, we expand to a similar pattern which does not
4539        clobber CC_REGNUM.  */
4540   }
4541   [(set_attr "type" "neon_compare,multiple")]
4544 (define_insn "*aarch64_cm<optab>di"
4545   [(set (match_operand:DI 0 "register_operand" "=w")
4546         (neg:DI
4547           (UCOMPARISONS:DI
4548             (match_operand:DI 1 "register_operand" "w")
4549             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4550           )))]
4551   "TARGET_SIMD && reload_completed"
4552   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4553   [(set_attr "type" "neon_compare")]
4556 ;; cmtst
4558 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4559 ;; we don't have any insns using ne, and aarch64_vcond outputs
4560 ;; not (neg (eq (and x y) 0))
4561 ;; which is rewritten by simplify_rtx as
4562 ;; plus (eq (and x y) 0) -1.
4564 (define_insn "aarch64_cmtst<mode>"
4565   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4566         (plus:<V_INT_EQUIV>
4567           (eq:<V_INT_EQUIV>
4568             (and:VDQ_I
4569               (match_operand:VDQ_I 1 "register_operand" "w")
4570               (match_operand:VDQ_I 2 "register_operand" "w"))
4571             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4572           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4573   ]
4574   "TARGET_SIMD"
4575   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4576   [(set_attr "type" "neon_tst<q>")]
4579 (define_insn_and_split "aarch64_cmtstdi"
4580   [(set (match_operand:DI 0 "register_operand" "=w,r")
4581         (neg:DI
4582           (ne:DI
4583             (and:DI
4584               (match_operand:DI 1 "register_operand" "w,r")
4585               (match_operand:DI 2 "register_operand" "w,r"))
4586             (const_int 0))))
4587     (clobber (reg:CC CC_REGNUM))]
4588   "TARGET_SIMD"
4589   "#"
4590   "reload_completed"
4591   [(set (match_operand:DI 0 "register_operand")
4592         (neg:DI
4593           (ne:DI
4594             (and:DI
4595               (match_operand:DI 1 "register_operand")
4596               (match_operand:DI 2 "register_operand"))
4597             (const_int 0))))]
4598   {
4599     /* If we are in the general purpose register file,
4600        we split to a sequence of comparison and store.  */
4601     if (GP_REGNUM_P (REGNO (operands[0]))
4602         && GP_REGNUM_P (REGNO (operands[1])))
4603       {
4604         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4605         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4606         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4607         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4608         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4609         DONE;
4610       }
4611     /* Otherwise, we expand to a similar pattern which does not
4612        clobber CC_REGNUM.  */
4613   }
4614   [(set_attr "type" "neon_tst,multiple")]
4617 (define_insn "*aarch64_cmtstdi"
4618   [(set (match_operand:DI 0 "register_operand" "=w")
4619         (neg:DI
4620           (ne:DI
4621             (and:DI
4622               (match_operand:DI 1 "register_operand" "w")
4623               (match_operand:DI 2 "register_operand" "w"))
4624             (const_int 0))))]
4625   "TARGET_SIMD"
4626   "cmtst\t%d0, %d1, %d2"
4627   [(set_attr "type" "neon_tst")]
4630 ;; fcm(eq|ge|gt|le|lt)
4632 (define_insn "aarch64_cm<optab><mode>"
4633   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4634         (neg:<V_INT_EQUIV>
4635           (COMPARISONS:<V_INT_EQUIV>
4636             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4637             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4638           )))]
4639   "TARGET_SIMD"
4640   "@
4641   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4642   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4643   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4646 ;; fac(ge|gt)
4647 ;; Note we can also handle what would be fac(le|lt) by
4648 ;; generating fac(ge|gt).
4650 (define_insn "aarch64_fac<optab><mode>"
4651   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4652         (neg:<V_INT_EQUIV>
4653           (FAC_COMPARISONS:<V_INT_EQUIV>
4654             (abs:VHSDF_HSDF
4655               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4656             (abs:VHSDF_HSDF
4657               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4658   )))]
4659   "TARGET_SIMD"
4660   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4661   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4664 ;; addp
4666 (define_insn "aarch64_addp<mode>"
4667   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4668         (unspec:VD_BHSI
4669           [(match_operand:VD_BHSI 1 "register_operand" "w")
4670            (match_operand:VD_BHSI 2 "register_operand" "w")]
4671           UNSPEC_ADDP))]
4672   "TARGET_SIMD"
4673   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4674   [(set_attr "type" "neon_reduc_add<q>")]
4677 (define_insn "aarch64_addpdi"
4678   [(set (match_operand:DI 0 "register_operand" "=w")
4679         (unspec:DI
4680           [(match_operand:V2DI 1 "register_operand" "w")]
4681           UNSPEC_ADDP))]
4682   "TARGET_SIMD"
4683   "addp\t%d0, %1.2d"
4684   [(set_attr "type" "neon_reduc_add")]
4687 ;; sqrt
4689 (define_expand "sqrt<mode>2"
4690   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4691         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4692   "TARGET_SIMD"
4694   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4695     DONE;
4698 (define_insn "*sqrt<mode>2"
4699   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4700         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4701   "TARGET_SIMD"
4702   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4703   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4706 ;; Patterns for vector struct loads and stores.
4708 (define_insn "aarch64_simd_ld2<mode>"
4709   [(set (match_operand:OI 0 "register_operand" "=w")
4710         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4711                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4712                    UNSPEC_LD2))]
4713   "TARGET_SIMD"
4714   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4715   [(set_attr "type" "neon_load2_2reg<q>")]
4718 (define_insn "aarch64_simd_ld2r<mode>"
4719   [(set (match_operand:OI 0 "register_operand" "=w")
4720        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4721                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4722                   UNSPEC_LD2_DUP))]
4723   "TARGET_SIMD"
4724   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725   [(set_attr "type" "neon_load2_all_lanes<q>")]
4728 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4729   [(set (match_operand:OI 0 "register_operand" "=w")
4730         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731                     (match_operand:OI 2 "register_operand" "0")
4732                     (match_operand:SI 3 "immediate_operand" "i")
4733                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4734                    UNSPEC_LD2_LANE))]
4735   "TARGET_SIMD"
4736   {
4737     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4738     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4739   }
4740   [(set_attr "type" "neon_load2_one_lane")]
4743 (define_expand "vec_load_lanesoi<mode>"
4744   [(set (match_operand:OI 0 "register_operand" "=w")
4745         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4746                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747                    UNSPEC_LD2))]
4748   "TARGET_SIMD"
4750   if (BYTES_BIG_ENDIAN)
4751     {
4752       rtx tmp = gen_reg_rtx (OImode);
4753       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4754       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4755       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4756     }
4757   else
4758     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4759   DONE;
4762 (define_insn "aarch64_simd_st2<mode>"
4763   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4764         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4765                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4766                    UNSPEC_ST2))]
4767   "TARGET_SIMD"
4768   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4769   [(set_attr "type" "neon_store2_2reg<q>")]
4772 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4773 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4774   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4775         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4776                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4777                     (match_operand:SI 2 "immediate_operand" "i")]
4778                    UNSPEC_ST2_LANE))]
4779   "TARGET_SIMD"
4780   {
4781     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4782     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4783   }
4784   [(set_attr "type" "neon_store2_one_lane<q>")]
4787 (define_expand "vec_store_lanesoi<mode>"
4788   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4789         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4790                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791                    UNSPEC_ST2))]
4792   "TARGET_SIMD"
4794   if (BYTES_BIG_ENDIAN)
4795     {
4796       rtx tmp = gen_reg_rtx (OImode);
4797       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4798       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4799       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4800     }
4801   else
4802     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4803   DONE;
4806 (define_insn "aarch64_simd_ld3<mode>"
4807   [(set (match_operand:CI 0 "register_operand" "=w")
4808         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4809                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810                    UNSPEC_LD3))]
4811   "TARGET_SIMD"
4812   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4813   [(set_attr "type" "neon_load3_3reg<q>")]
4816 (define_insn "aarch64_simd_ld3r<mode>"
4817   [(set (match_operand:CI 0 "register_operand" "=w")
4818        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4819                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4820                   UNSPEC_LD3_DUP))]
4821   "TARGET_SIMD"
4822   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823   [(set_attr "type" "neon_load3_all_lanes<q>")]
4826 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4827   [(set (match_operand:CI 0 "register_operand" "=w")
4828         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829                     (match_operand:CI 2 "register_operand" "0")
4830                     (match_operand:SI 3 "immediate_operand" "i")
4831                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4832                    UNSPEC_LD3_LANE))]
4833   "TARGET_SIMD"
4835     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4836     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4838   [(set_attr "type" "neon_load3_one_lane")]
4841 (define_expand "vec_load_lanesci<mode>"
4842   [(set (match_operand:CI 0 "register_operand" "=w")
4843         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4844                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845                    UNSPEC_LD3))]
4846   "TARGET_SIMD"
4848   if (BYTES_BIG_ENDIAN)
4849     {
4850       rtx tmp = gen_reg_rtx (CImode);
4851       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4852       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4853       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4854     }
4855   else
4856     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4857   DONE;
4860 (define_insn "aarch64_simd_st3<mode>"
4861   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4862         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4863                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4864                    UNSPEC_ST3))]
4865   "TARGET_SIMD"
4866   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4867   [(set_attr "type" "neon_store3_3reg<q>")]
4870 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4871 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4872   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4873         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4874                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4875                      (match_operand:SI 2 "immediate_operand" "i")]
4876                     UNSPEC_ST3_LANE))]
4877   "TARGET_SIMD"
4878   {
4879     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4880     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4881   }
4882   [(set_attr "type" "neon_store3_one_lane<q>")]
4885 (define_expand "vec_store_lanesci<mode>"
4886   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4887         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4888                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4889                    UNSPEC_ST3))]
4890   "TARGET_SIMD"
4892   if (BYTES_BIG_ENDIAN)
4893     {
4894       rtx tmp = gen_reg_rtx (CImode);
4895       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4896       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4897       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4898     }
4899   else
4900     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4901   DONE;
4904 (define_insn "aarch64_simd_ld4<mode>"
4905   [(set (match_operand:XI 0 "register_operand" "=w")
4906         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4907                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908                    UNSPEC_LD4))]
4909   "TARGET_SIMD"
4910   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4911   [(set_attr "type" "neon_load4_4reg<q>")]
4914 (define_insn "aarch64_simd_ld4r<mode>"
4915   [(set (match_operand:XI 0 "register_operand" "=w")
4916        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4917                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4918                   UNSPEC_LD4_DUP))]
4919   "TARGET_SIMD"
4920   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921   [(set_attr "type" "neon_load4_all_lanes<q>")]
4924 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4925   [(set (match_operand:XI 0 "register_operand" "=w")
4926         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927                     (match_operand:XI 2 "register_operand" "0")
4928                     (match_operand:SI 3 "immediate_operand" "i")
4929                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4930                    UNSPEC_LD4_LANE))]
4931   "TARGET_SIMD"
4933     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4934     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4936   [(set_attr "type" "neon_load4_one_lane")]
4939 (define_expand "vec_load_lanesxi<mode>"
4940   [(set (match_operand:XI 0 "register_operand" "=w")
4941         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4942                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4943                    UNSPEC_LD4))]
4944   "TARGET_SIMD"
4946   if (BYTES_BIG_ENDIAN)
4947     {
4948       rtx tmp = gen_reg_rtx (XImode);
4949       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4950       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4951       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4952     }
4953   else
4954     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4955   DONE;
4958 (define_insn "aarch64_simd_st4<mode>"
4959   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4960         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4961                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962                    UNSPEC_ST4))]
4963   "TARGET_SIMD"
4964   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4965   [(set_attr "type" "neon_store4_4reg<q>")]
4968 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4969 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4970   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4971         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4972                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4973                      (match_operand:SI 2 "immediate_operand" "i")]
4974                     UNSPEC_ST4_LANE))]
4975   "TARGET_SIMD"
4976   {
4977     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4978     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4979   }
4980   [(set_attr "type" "neon_store4_one_lane<q>")]
4983 (define_expand "vec_store_lanesxi<mode>"
4984   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4985         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4986                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4987                    UNSPEC_ST4))]
4988   "TARGET_SIMD"
4990   if (BYTES_BIG_ENDIAN)
4991     {
4992       rtx tmp = gen_reg_rtx (XImode);
4993       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4994       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4995       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4996     }
4997   else
4998     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4999   DONE;
5002 (define_insn_and_split "aarch64_rev_reglist<mode>"
5003 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5004         (unspec:VSTRUCT
5005                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5006                     (match_operand:V16QI 2 "register_operand" "w")]
5007                    UNSPEC_REV_REGLIST))]
5008   "TARGET_SIMD"
5009   "#"
5010   "&& reload_completed"
5011   [(const_int 0)]
5013   int i;
5014   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5015   for (i = 0; i < nregs; i++)
5016     {
5017       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5018       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5019       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5020     }
5021   DONE;
5023   [(set_attr "type" "neon_tbl1_q")
5024    (set_attr "length" "<insn_count>")]
5027 ;; Reload patterns for AdvSIMD register list operands.
5029 (define_expand "mov<mode>"
5030   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5031         (match_operand:VSTRUCT 1 "general_operand" ""))]
5032   "TARGET_SIMD"
5034   if (can_create_pseudo_p ())
5035     {
5036       if (GET_CODE (operands[0]) != REG)
5037         operands[1] = force_reg (<MODE>mode, operands[1]);
5038     }
5041 (define_insn "*aarch64_mov<mode>"
5042   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5043         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5044   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5045    && (register_operand (operands[0], <MODE>mode)
5046        || register_operand (operands[1], <MODE>mode))"
5047   "@
5048    #
5049    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5050    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5051   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5052                      neon_load<nregs>_<nregs>reg_q")
5053    (set_attr "length" "<insn_count>,4,4")]
5056 (define_insn "aarch64_be_ld1<mode>"
5057   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5058         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5059                              "aarch64_simd_struct_operand" "Utv")]
5060         UNSPEC_LD1))]
5061   "TARGET_SIMD"
5062   "ld1\\t{%0<Vmtype>}, %1"
5063   [(set_attr "type" "neon_load1_1reg<q>")]
5066 (define_insn "aarch64_be_st1<mode>"
5067   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5068         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5069         UNSPEC_ST1))]
5070   "TARGET_SIMD"
5071   "st1\\t{%1<Vmtype>}, %0"
5072   [(set_attr "type" "neon_store1_1reg<q>")]
5075 (define_insn "*aarch64_be_movoi"
5076   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5077         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5078   "TARGET_SIMD && BYTES_BIG_ENDIAN
5079    && (register_operand (operands[0], OImode)
5080        || register_operand (operands[1], OImode))"
5081   "@
5082    #
5083    stp\\t%q1, %R1, %0
5084    ldp\\t%q0, %R0, %1"
5085   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5086    (set_attr "length" "8,4,4")]
5089 (define_insn "*aarch64_be_movci"
5090   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5091         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5092   "TARGET_SIMD && BYTES_BIG_ENDIAN
5093    && (register_operand (operands[0], CImode)
5094        || register_operand (operands[1], CImode))"
5095   "#"
5096   [(set_attr "type" "multiple")
5097    (set_attr "length" "12,4,4")]
5100 (define_insn "*aarch64_be_movxi"
5101   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5102         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5103   "TARGET_SIMD && BYTES_BIG_ENDIAN
5104    && (register_operand (operands[0], XImode)
5105        || register_operand (operands[1], XImode))"
5106   "#"
5107   [(set_attr "type" "multiple")
5108    (set_attr "length" "16,4,4")]
5111 (define_split
5112   [(set (match_operand:OI 0 "register_operand")
5113         (match_operand:OI 1 "register_operand"))]
5114   "TARGET_SIMD && reload_completed"
5115   [(const_int 0)]
5117   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5118   DONE;
5121 (define_split
5122   [(set (match_operand:CI 0 "nonimmediate_operand")
5123         (match_operand:CI 1 "general_operand"))]
5124   "TARGET_SIMD && reload_completed"
5125   [(const_int 0)]
5127   if (register_operand (operands[0], CImode)
5128       && register_operand (operands[1], CImode))
5129     {
5130       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5131       DONE;
5132     }
5133   else if (BYTES_BIG_ENDIAN)
5134     {
5135       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5136                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5137       emit_move_insn (gen_lowpart (V16QImode,
5138                                    simplify_gen_subreg (TImode, operands[0],
5139                                                         CImode, 32)),
5140                       gen_lowpart (V16QImode,
5141                                    simplify_gen_subreg (TImode, operands[1],
5142                                                         CImode, 32)));
5143       DONE;
5144     }
5145   else
5146     FAIL;
5149 (define_split
5150   [(set (match_operand:XI 0 "nonimmediate_operand")
5151         (match_operand:XI 1 "general_operand"))]
5152   "TARGET_SIMD && reload_completed"
5153   [(const_int 0)]
5155   if (register_operand (operands[0], XImode)
5156       && register_operand (operands[1], XImode))
5157     {
5158       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5159       DONE;
5160     }
5161   else if (BYTES_BIG_ENDIAN)
5162     {
5163       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5164                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5165       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5166                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5167       DONE;
5168     }
5169   else
5170     FAIL;
5173 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5174   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5175    (match_operand:DI 1 "register_operand" "w")
5176    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5177   "TARGET_SIMD"
5179   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5180   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5181                      * <VSTRUCT:nregs>);
5183   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5184                                                                 mem));
5185   DONE;
5188 (define_insn "aarch64_ld2<mode>_dreg"
5189   [(set (match_operand:OI 0 "register_operand" "=w")
5190         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5191                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5192                    UNSPEC_LD2_DREG))]
5193   "TARGET_SIMD"
5194   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5195   [(set_attr "type" "neon_load2_2reg<q>")]
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199   [(set (match_operand:OI 0 "register_operand" "=w")
5200         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202                    UNSPEC_LD2_DREG))]
5203   "TARGET_SIMD"
5204   "ld1\\t{%S0.1d - %T0.1d}, %1"
5205   [(set_attr "type" "neon_load1_2reg<q>")]
5208 (define_insn "aarch64_ld3<mode>_dreg"
5209   [(set (match_operand:CI 0 "register_operand" "=w")
5210         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212                    UNSPEC_LD3_DREG))]
5213   "TARGET_SIMD"
5214   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5215   [(set_attr "type" "neon_load3_3reg<q>")]
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219   [(set (match_operand:CI 0 "register_operand" "=w")
5220         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222                    UNSPEC_LD3_DREG))]
5223   "TARGET_SIMD"
5224   "ld1\\t{%S0.1d - %U0.1d}, %1"
5225   [(set_attr "type" "neon_load1_3reg<q>")]
5228 (define_insn "aarch64_ld4<mode>_dreg"
5229   [(set (match_operand:XI 0 "register_operand" "=w")
5230         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232                    UNSPEC_LD4_DREG))]
5233   "TARGET_SIMD"
5234   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5235   [(set_attr "type" "neon_load4_4reg<q>")]
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239   [(set (match_operand:XI 0 "register_operand" "=w")
5240         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242                    UNSPEC_LD4_DREG))]
5243   "TARGET_SIMD"
5244   "ld1\\t{%S0.1d - %V0.1d}, %1"
5245   [(set_attr "type" "neon_load1_4reg<q>")]
5248 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5249  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5250   (match_operand:DI 1 "register_operand" "r")
5251   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252   "TARGET_SIMD"
5254   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5255   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5257   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5258   DONE;
5261 (define_expand "aarch64_ld1<VALL_F16:mode>"
5262  [(match_operand:VALL_F16 0 "register_operand")
5263   (match_operand:DI 1 "register_operand")]
5264   "TARGET_SIMD"
5266   machine_mode mode = <VALL_F16:MODE>mode;
5267   rtx mem = gen_rtx_MEM (mode, operands[1]);
5269   if (BYTES_BIG_ENDIAN)
5270     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5271   else
5272     emit_move_insn (operands[0], mem);
5273   DONE;
5276 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5277  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5278   (match_operand:DI 1 "register_operand" "r")
5279   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280   "TARGET_SIMD"
5282   machine_mode mode = <VSTRUCT:MODE>mode;
5283   rtx mem = gen_rtx_MEM (mode, operands[1]);
5285   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5286   DONE;
5289 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5290   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5291         (match_operand:DI 1 "register_operand" "w")
5292         (match_operand:VSTRUCT 2 "register_operand" "0")
5293         (match_operand:SI 3 "immediate_operand" "i")
5294         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5295   "TARGET_SIMD"
5297   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5298   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5299                      * <VSTRUCT:nregs>);
5301   aarch64_simd_lane_bounds (operands[3], 0,
5302                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5303                             NULL);
5304   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5305         operands[0], mem, operands[2], operands[3]));
5306   DONE;
5309 ;; Expanders for builtins to extract vector registers from large
5310 ;; opaque integer modes.
5312 ;; D-register list.
5314 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5315  [(match_operand:VDC 0 "register_operand" "=w")
5316   (match_operand:VSTRUCT 1 "register_operand" "w")
5317   (match_operand:SI 2 "immediate_operand" "i")]
5318   "TARGET_SIMD"
5320   int part = INTVAL (operands[2]);
5321   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5322   int offset = part * 16;
5324   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5325   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5326   DONE;
5329 ;; Q-register list.
5331 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5332  [(match_operand:VQ 0 "register_operand" "=w")
5333   (match_operand:VSTRUCT 1 "register_operand" "w")
5334   (match_operand:SI 2 "immediate_operand" "i")]
5335   "TARGET_SIMD"
5337   int part = INTVAL (operands[2]);
5338   int offset = part * 16;
5340   emit_move_insn (operands[0],
5341                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5342   DONE;
5345 ;; Permuted-store expanders for neon intrinsics.
5347 ;; Permute instructions
5349 ;; vec_perm support
5351 (define_expand "vec_perm_const<mode>"
5352   [(match_operand:VALL_F16 0 "register_operand")
5353    (match_operand:VALL_F16 1 "register_operand")
5354    (match_operand:VALL_F16 2 "register_operand")
5355    (match_operand:<V_INT_EQUIV> 3)]
5356   "TARGET_SIMD"
5358   if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5359                                      operands[2], operands[3], <nunits>))
5360     DONE;
5361   else
5362     FAIL;
5365 (define_expand "vec_perm<mode>"
5366   [(match_operand:VB 0 "register_operand")
5367    (match_operand:VB 1 "register_operand")
5368    (match_operand:VB 2 "register_operand")
5369    (match_operand:VB 3 "register_operand")]
5370   "TARGET_SIMD"
5372   aarch64_expand_vec_perm (operands[0], operands[1],
5373                            operands[2], operands[3], <nunits>);
5374   DONE;
5377 (define_insn "aarch64_tbl1<mode>"
5378   [(set (match_operand:VB 0 "register_operand" "=w")
5379         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5380                     (match_operand:VB 2 "register_operand" "w")]
5381                    UNSPEC_TBL))]
5382   "TARGET_SIMD"
5383   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5384   [(set_attr "type" "neon_tbl1<q>")]
5387 ;; Two source registers.
5389 (define_insn "aarch64_tbl2v16qi"
5390   [(set (match_operand:V16QI 0 "register_operand" "=w")
5391         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5392                        (match_operand:V16QI 2 "register_operand" "w")]
5393                       UNSPEC_TBL))]
5394   "TARGET_SIMD"
5395   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5396   [(set_attr "type" "neon_tbl2_q")]
5399 (define_insn "aarch64_tbl3<mode>"
5400   [(set (match_operand:VB 0 "register_operand" "=w")
5401         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5402                       (match_operand:VB 2 "register_operand" "w")]
5403                       UNSPEC_TBL))]
5404   "TARGET_SIMD"
5405   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5406   [(set_attr "type" "neon_tbl3")]
5409 (define_insn "aarch64_tbx4<mode>"
5410   [(set (match_operand:VB 0 "register_operand" "=w")
5411         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5412                       (match_operand:OI 2 "register_operand" "w")
5413                       (match_operand:VB 3 "register_operand" "w")]
5414                       UNSPEC_TBX))]
5415   "TARGET_SIMD"
5416   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5417   [(set_attr "type" "neon_tbl4")]
5420 ;; Three source registers.
5422 (define_insn "aarch64_qtbl3<mode>"
5423   [(set (match_operand:VB 0 "register_operand" "=w")
5424         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5425                       (match_operand:VB 2 "register_operand" "w")]
5426                       UNSPEC_TBL))]
5427   "TARGET_SIMD"
5428   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5429   [(set_attr "type" "neon_tbl3")]
5432 (define_insn "aarch64_qtbx3<mode>"
5433   [(set (match_operand:VB 0 "register_operand" "=w")
5434         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435                       (match_operand:CI 2 "register_operand" "w")
5436                       (match_operand:VB 3 "register_operand" "w")]
5437                       UNSPEC_TBX))]
5438   "TARGET_SIMD"
5439   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5440   [(set_attr "type" "neon_tbl3")]
5443 ;; Four source registers.
5445 (define_insn "aarch64_qtbl4<mode>"
5446   [(set (match_operand:VB 0 "register_operand" "=w")
5447         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5448                       (match_operand:VB 2 "register_operand" "w")]
5449                       UNSPEC_TBL))]
5450   "TARGET_SIMD"
5451   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5452   [(set_attr "type" "neon_tbl4")]
5455 (define_insn "aarch64_qtbx4<mode>"
5456   [(set (match_operand:VB 0 "register_operand" "=w")
5457         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458                       (match_operand:XI 2 "register_operand" "w")
5459                       (match_operand:VB 3 "register_operand" "w")]
5460                       UNSPEC_TBX))]
5461   "TARGET_SIMD"
5462   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5463   [(set_attr "type" "neon_tbl4")]
5466 (define_insn_and_split "aarch64_combinev16qi"
5467   [(set (match_operand:OI 0 "register_operand" "=w")
5468         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5469                     (match_operand:V16QI 2 "register_operand" "w")]
5470                    UNSPEC_CONCAT))]
5471   "TARGET_SIMD"
5472   "#"
5473   "&& reload_completed"
5474   [(const_int 0)]
5476   aarch64_split_combinev16qi (operands);
5477   DONE;
5479 [(set_attr "type" "multiple")]
5482 ;; This instruction's pattern is generated directly by
5483 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5484 ;; need corresponding changes there.
5485 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5486   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5487         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5488                           (match_operand:VALL_F16 2 "register_operand" "w")]
5489          PERMUTE))]
5490   "TARGET_SIMD"
5491   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5492   [(set_attr "type" "neon_permute<q>")]
5495 ;; This instruction's pattern is generated directly by
5496 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5497 ;; need corresponding changes there.  Note that the immediate (third)
5498 ;; operand is a lane index not a byte index.
5499 (define_insn "aarch64_ext<mode>"
5500   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5501         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5502                           (match_operand:VALL_F16 2 "register_operand" "w")
5503                           (match_operand:SI 3 "immediate_operand" "i")]
5504          UNSPEC_EXT))]
5505   "TARGET_SIMD"
5507   operands[3] = GEN_INT (INTVAL (operands[3])
5508       * GET_MODE_UNIT_SIZE (<MODE>mode));
5509   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5511   [(set_attr "type" "neon_ext<q>")]
5514 ;; This instruction's pattern is generated directly by
5515 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5516 ;; need corresponding changes there.
5517 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5518   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5519         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5520                     REVERSE))]
5521   "TARGET_SIMD"
5522   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5523   [(set_attr "type" "neon_rev<q>")]
5526 (define_insn "aarch64_st2<mode>_dreg"
5527   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5528         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5529                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530                    UNSPEC_ST2))]
5531   "TARGET_SIMD"
5532   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5533   [(set_attr "type" "neon_store2_2reg")]
5536 (define_insn "aarch64_st2<mode>_dreg"
5537   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5538         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5539                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540                    UNSPEC_ST2))]
5541   "TARGET_SIMD"
5542   "st1\\t{%S1.1d - %T1.1d}, %0"
5543   [(set_attr "type" "neon_store1_2reg")]
5546 (define_insn "aarch64_st3<mode>_dreg"
5547   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5548         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5549                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550                    UNSPEC_ST3))]
5551   "TARGET_SIMD"
5552   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5553   [(set_attr "type" "neon_store3_3reg")]
5556 (define_insn "aarch64_st3<mode>_dreg"
5557   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5558         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5559                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5560                    UNSPEC_ST3))]
5561   "TARGET_SIMD"
5562   "st1\\t{%S1.1d - %U1.1d}, %0"
5563   [(set_attr "type" "neon_store1_3reg")]
5566 (define_insn "aarch64_st4<mode>_dreg"
5567   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5568         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5569                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5570                    UNSPEC_ST4))]
5571   "TARGET_SIMD"
5572   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5573   [(set_attr "type" "neon_store4_4reg")]
5576 (define_insn "aarch64_st4<mode>_dreg"
5577   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5578         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5579                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5580                    UNSPEC_ST4))]
5581   "TARGET_SIMD"
5582   "st1\\t{%S1.1d - %V1.1d}, %0"
5583   [(set_attr "type" "neon_store1_4reg")]
5586 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5587  [(match_operand:DI 0 "register_operand" "r")
5588   (match_operand:VSTRUCT 1 "register_operand" "w")
5589   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590   "TARGET_SIMD"
5592   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5593   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5595   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5596   DONE;
5599 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5600  [(match_operand:DI 0 "register_operand" "r")
5601   (match_operand:VSTRUCT 1 "register_operand" "w")
5602   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5603   "TARGET_SIMD"
5605   machine_mode mode = <VSTRUCT:MODE>mode;
5606   rtx mem = gen_rtx_MEM (mode, operands[0]);
5608   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5609   DONE;
5612 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5613  [(match_operand:DI 0 "register_operand" "r")
5614   (match_operand:VSTRUCT 1 "register_operand" "w")
5615   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5616   (match_operand:SI 2 "immediate_operand")]
5617   "TARGET_SIMD"
5619   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5620   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5621                      * <VSTRUCT:nregs>);
5623   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5624                 mem, operands[1], operands[2]));
5625   DONE;
5628 (define_expand "aarch64_st1<VALL_F16:mode>"
5629  [(match_operand:DI 0 "register_operand")
5630   (match_operand:VALL_F16 1 "register_operand")]
5631   "TARGET_SIMD"
5633   machine_mode mode = <VALL_F16:MODE>mode;
5634   rtx mem = gen_rtx_MEM (mode, operands[0]);
5636   if (BYTES_BIG_ENDIAN)
5637     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5638   else
5639     emit_move_insn (mem, operands[1]);
5640   DONE;
5643 ;; Expander for builtins to insert vector registers into large
5644 ;; opaque integer modes.
5646 ;; Q-register list.  We don't need a D-reg inserter as we zero
5647 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5649 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5650  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5651   (match_operand:VSTRUCT 1 "register_operand" "0")
5652   (match_operand:VQ 2 "register_operand" "w")
5653   (match_operand:SI 3 "immediate_operand" "i")]
5654   "TARGET_SIMD"
5656   int part = INTVAL (operands[3]);
5657   int offset = part * 16;
5659   emit_move_insn (operands[0], operands[1]);
5660   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5661                   operands[2]);
5662   DONE;
5665 ;; Standard pattern name vec_init<mode><Vel>.
5667 (define_expand "vec_init<mode><Vel>"
5668   [(match_operand:VALL_F16 0 "register_operand" "")
5669    (match_operand 1 "" "")]
5670   "TARGET_SIMD"
5672   aarch64_expand_vector_init (operands[0], operands[1]);
5673   DONE;
5676 (define_insn "*aarch64_simd_ld1r<mode>"
5677   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5678         (vec_duplicate:VALL_F16
5679           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5680   "TARGET_SIMD"
5681   "ld1r\\t{%0.<Vtype>}, %1"
5682   [(set_attr "type" "neon_load1_all_lanes")]
5685 (define_insn "aarch64_frecpe<mode>"
5686   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5687         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5688          UNSPEC_FRECPE))]
5689   "TARGET_SIMD"
5690   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5691   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5694 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5695   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5696         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5697          FRECP))]
5698   "TARGET_SIMD"
5699   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5700   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5703 (define_insn "aarch64_frecps<mode>"
5704   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5705         (unspec:VHSDF_HSDF
5706           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5707           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5708           UNSPEC_FRECPS))]
5709   "TARGET_SIMD"
5710   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5711   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5714 (define_insn "aarch64_urecpe<mode>"
5715   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5716         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5717                 UNSPEC_URECPE))]
5718  "TARGET_SIMD"
5719  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5720   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5722 ;; Standard pattern name vec_extract<mode><Vel>.
5724 (define_expand "vec_extract<mode><Vel>"
5725   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5726    (match_operand:VALL_F16 1 "register_operand" "")
5727    (match_operand:SI 2 "immediate_operand" "")]
5728   "TARGET_SIMD"
5730     emit_insn
5731       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5732     DONE;
5735 ;; aes
5737 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5738   [(set (match_operand:V16QI 0 "register_operand" "=w")
5739         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5740                        (match_operand:V16QI 2 "register_operand" "w")]
5741          CRYPTO_AES))]
5742   "TARGET_SIMD && TARGET_CRYPTO"
5743   "aes<aes_op>\\t%0.16b, %2.16b"
5744   [(set_attr "type" "crypto_aese")]
5747 ;; When AES/AESMC fusion is enabled we want the register allocation to
5748 ;; look like:
5749 ;;    AESE Vn, _
5750 ;;    AESMC Vn, Vn
5751 ;; So prefer to tie operand 1 to operand 0 when fusing.
5753 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5754   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5755         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5756          CRYPTO_AESMC))]
5757   "TARGET_SIMD && TARGET_CRYPTO"
5758   "aes<aesmc_op>\\t%0.16b, %1.16b"
5759   [(set_attr "type" "crypto_aesmc")
5760    (set_attr_alternative "enabled"
5761      [(if_then_else (match_test
5762                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5763                      (const_string "yes" )
5764                      (const_string "no"))
5765       (const_string "yes")])]
5768 ;; sha1
5770 (define_insn "aarch64_crypto_sha1hsi"
5771   [(set (match_operand:SI 0 "register_operand" "=w")
5772         (unspec:SI [(match_operand:SI 1
5773                        "register_operand" "w")]
5774          UNSPEC_SHA1H))]
5775   "TARGET_SIMD && TARGET_CRYPTO"
5776   "sha1h\\t%s0, %s1"
5777   [(set_attr "type" "crypto_sha1_fast")]
5780 (define_insn "aarch64_crypto_sha1hv4si"
5781   [(set (match_operand:SI 0 "register_operand" "=w")
5782         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5783                      (parallel [(const_int 0)]))]
5784          UNSPEC_SHA1H))]
5785   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5786   "sha1h\\t%s0, %s1"
5787   [(set_attr "type" "crypto_sha1_fast")]
5790 (define_insn "aarch64_be_crypto_sha1hv4si"
5791   [(set (match_operand:SI 0 "register_operand" "=w")
5792         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5793                      (parallel [(const_int 3)]))]
5794          UNSPEC_SHA1H))]
5795   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5796   "sha1h\\t%s0, %s1"
5797   [(set_attr "type" "crypto_sha1_fast")]
5800 (define_insn "aarch64_crypto_sha1su1v4si"
5801   [(set (match_operand:V4SI 0 "register_operand" "=w")
5802         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5803                       (match_operand:V4SI 2 "register_operand" "w")]
5804          UNSPEC_SHA1SU1))]
5805   "TARGET_SIMD && TARGET_CRYPTO"
5806   "sha1su1\\t%0.4s, %2.4s"
5807   [(set_attr "type" "crypto_sha1_fast")]
5810 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5811   [(set (match_operand:V4SI 0 "register_operand" "=w")
5812         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5813                       (match_operand:SI 2 "register_operand" "w")
5814                       (match_operand:V4SI 3 "register_operand" "w")]
5815          CRYPTO_SHA1))]
5816   "TARGET_SIMD && TARGET_CRYPTO"
5817   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5818   [(set_attr "type" "crypto_sha1_slow")]
5821 (define_insn "aarch64_crypto_sha1su0v4si"
5822   [(set (match_operand:V4SI 0 "register_operand" "=w")
5823         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5824                       (match_operand:V4SI 2 "register_operand" "w")
5825                       (match_operand:V4SI 3 "register_operand" "w")]
5826          UNSPEC_SHA1SU0))]
5827   "TARGET_SIMD && TARGET_CRYPTO"
5828   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5829   [(set_attr "type" "crypto_sha1_xor")]
5832 ;; sha256
5834 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5835   [(set (match_operand:V4SI 0 "register_operand" "=w")
5836         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5837                       (match_operand:V4SI 2 "register_operand" "w")
5838                       (match_operand:V4SI 3 "register_operand" "w")]
5839          CRYPTO_SHA256))]
5840   "TARGET_SIMD && TARGET_CRYPTO"
5841   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5842   [(set_attr "type" "crypto_sha256_slow")]
5845 (define_insn "aarch64_crypto_sha256su0v4si"
5846   [(set (match_operand:V4SI 0 "register_operand" "=w")
5847         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5848                       (match_operand:V4SI 2 "register_operand" "w")]
5849          UNSPEC_SHA256SU0))]
5850   "TARGET_SIMD &&TARGET_CRYPTO"
5851   "sha256su0\\t%0.4s, %2.4s"
5852   [(set_attr "type" "crypto_sha256_fast")]
5855 (define_insn "aarch64_crypto_sha256su1v4si"
5856   [(set (match_operand:V4SI 0 "register_operand" "=w")
5857         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5858                       (match_operand:V4SI 2 "register_operand" "w")
5859                       (match_operand:V4SI 3 "register_operand" "w")]
5860          UNSPEC_SHA256SU1))]
5861   "TARGET_SIMD &&TARGET_CRYPTO"
5862   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5863   [(set_attr "type" "crypto_sha256_slow")]
5866 ;; pmull
5868 (define_insn "aarch64_crypto_pmulldi"
5869   [(set (match_operand:TI 0 "register_operand" "=w")
5870         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5871                      (match_operand:DI 2 "register_operand" "w")]
5872                     UNSPEC_PMULL))]
5873  "TARGET_SIMD && TARGET_CRYPTO"
5874  "pmull\\t%0.1q, %1.1d, %2.1d"
5875   [(set_attr "type" "crypto_pmull")]
5878 (define_insn "aarch64_crypto_pmullv2di"
5879  [(set (match_operand:TI 0 "register_operand" "=w")
5880        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5881                    (match_operand:V2DI 2 "register_operand" "w")]
5882                   UNSPEC_PMULL2))]
5883   "TARGET_SIMD && TARGET_CRYPTO"
5884   "pmull2\\t%0.1q, %1.2d, %2.2d"
5885   [(set_attr "type" "crypto_pmull")]