1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves of multiple predicates
37 ;; ---- Moves relating to the FFR
40 ;; ---- Normal contiguous loads
41 ;; ---- Extending contiguous loads
42 ;; ---- First-faulting contiguous loads
43 ;; ---- First-faulting extending contiguous loads
44 ;; ---- Non-temporal contiguous loads
45 ;; ---- Normal gather loads
46 ;; ---- Extending gather loads
47 ;; ---- First-faulting gather loads
48 ;; ---- First-faulting extending gather loads
51 ;; ---- Contiguous prefetches
52 ;; ---- Gather prefetches
55 ;; ---- Normal contiguous stores
56 ;; ---- Truncating contiguous stores
57 ;; ---- Non-temporal contiguous stores
58 ;; ---- Normal scatter stores
59 ;; ---- Truncating scatter stores
62 ;; ---- [INT,FP] Duplicate element
63 ;; ---- [INT,FP] Initialize from individual elements
64 ;; ---- [INT] Linear series
65 ;; ---- [PRED] Duplicate element
67 ;; == Vector decomposition
68 ;; ---- [INT,FP] Extract index
69 ;; ---- [INT,FP] Extract active element
70 ;; ---- [PRED] Extract index
72 ;; == Unary arithmetic
73 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
74 ;; ---- [INT] General unary arithmetic corresponding to unspecs
75 ;; ---- [INT] Sign and zero extension
76 ;; ---- [INT] Truncation
77 ;; ---- [INT] Logical inverse
78 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
79 ;; ---- [FP] General unary arithmetic corresponding to unspecs
80 ;; ---- [FP] Square root
81 ;; ---- [FP] Reciprocal square root
82 ;; ---- [PRED] Inverse
84 ;; == Binary arithmetic
85 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
86 ;; ---- [INT] Addition
87 ;; ---- [INT] Subtraction
88 ;; ---- [INT] Take address
89 ;; ---- [INT] Absolute difference
90 ;; ---- [INT] Saturating addition and subtraction
91 ;; ---- [INT] Highpart multiplication
92 ;; ---- [INT] Division
93 ;; ---- [INT] Binary logical operations
94 ;; ---- [INT] Binary logical operations (inverted second input)
95 ;; ---- [INT] Shifts (rounding towards -Inf)
96 ;; ---- [INT] Shifts (rounding towards 0)
97 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
98 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
99 ;; ---- [FP] General binary arithmetic corresponding to unspecs
100 ;; ---- [FP] Addition
101 ;; ---- [FP] Complex addition
102 ;; ---- [FP] Subtraction
103 ;; ---- [FP] Absolute difference
104 ;; ---- [FP] Multiplication
105 ;; ---- [FP] Division
106 ;; ---- [FP] Binary logical operations
107 ;; ---- [FP] Sign copying
108 ;; ---- [FP] Maximum and minimum
109 ;; ---- [PRED] Binary logical operations
110 ;; ---- [PRED] Binary logical operations (inverted second input)
111 ;; ---- [PRED] Binary logical operations (inverted result)
113 ;; == Ternary arithmetic
114 ;; ---- [INT] MLA and MAD
115 ;; ---- [INT] MLS and MSB
116 ;; ---- [INT] Dot product
117 ;; ---- [INT] Sum of absolute differences
118 ;; ---- [INT] Matrix multiply-accumulate
119 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
120 ;; ---- [FP] Complex multiply-add
121 ;; ---- [FP] Trigonometric multiply-add
122 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
123 ;; ---- [FP] Matrix multiply-accumulate
125 ;; == Comparisons and selects
126 ;; ---- [INT,FP] Select based on predicates
127 ;; ---- [INT,FP] Compare and select
128 ;; ---- [INT] Comparisons
129 ;; ---- [INT] While tests
130 ;; ---- [FP] Direct comparisons
131 ;; ---- [FP] Absolute comparisons
132 ;; ---- [PRED] Select
133 ;; ---- [PRED] Test bits
136 ;; ---- [INT,FP] Conditional reductions
137 ;; ---- [INT] Tree reductions
138 ;; ---- [FP] Tree reductions
139 ;; ---- [FP] Left-to-right reductions
142 ;; ---- [INT,FP] General permutes
143 ;; ---- [INT,FP] Special-purpose unary permutes
144 ;; ---- [INT,FP] Special-purpose binary permutes
145 ;; ---- [PRED] Special-purpose unary permutes
146 ;; ---- [PRED] Special-purpose binary permutes
149 ;; ---- [INT<-INT] Packs
150 ;; ---- [INT<-INT] Unpacks
151 ;; ---- [INT<-FP] Conversions
152 ;; ---- [INT<-FP] Packs
153 ;; ---- [INT<-FP] Unpacks
154 ;; ---- [FP<-INT] Conversions
155 ;; ---- [FP<-INT] Packs
156 ;; ---- [FP<-INT] Unpacks
157 ;; ---- [FP<-FP] Packs
158 ;; ---- [FP<-FP] Packs (bfloat16)
159 ;; ---- [FP<-FP] Unpacks
160 ;; ---- [PRED<-PRED] Packs
161 ;; ---- [PRED<-PRED] Unpacks
163 ;; == Vector partitioning
164 ;; ---- [PRED] Unary partitioning
165 ;; ---- [PRED] Binary partitioning
166 ;; ---- [PRED] Scalarization
168 ;; == Counting elements
169 ;; ---- [INT] Count elements in a pattern (scalar)
170 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
171 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
172 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
173 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
174 ;; ---- [INT] Count elements in a predicate (scalar)
175 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
176 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
177 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
178 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
180 ;; =========================================================================
182 ;; =========================================================================
184 ;; -------------------------------------------------------------------------
185 ;; ---- Note on the handling of big-endian SVE
186 ;; -------------------------------------------------------------------------
188 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
189 ;; same way as movdi or movti would: the first byte of memory goes
190 ;; into the most significant byte of the register and the last byte
191 ;; of memory goes into the least significant byte of the register.
192 ;; This is the most natural ordering for Advanced SIMD and matches
193 ;; the ABI layout for 64-bit and 128-bit vector types.
195 ;; As a result, the order of bytes within the register is what GCC
196 ;; expects for a big-endian target, and subreg offsets therefore work
197 ;; as expected, with the first element in memory having subreg offset 0
198 ;; and the last element in memory having the subreg offset associated
199 ;; with a big-endian lowpart. However, this ordering also means that
200 ;; GCC's lane numbering does not match the architecture's numbering:
201 ;; GCC always treats the element at the lowest address in memory
202 ;; (subreg offset 0) as element 0, while the architecture treats
203 ;; the least significant end of the register as element 0.
205 ;; The situation for SVE is different. We want the layout of the
206 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
207 ;; logically, a mov<mode> load must be indistinguishable from a
208 ;; maskload<mode> whose mask is all true. We therefore need the
209 ;; register layout to match LD1 rather than LDR. The ABI layout of
210 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
212 ;; As a result, the architecture lane numbering matches GCC's lane
213 ;; numbering, with element 0 always being the first in memory.
216 ;; - Applying a subreg offset to a register does not give the element
217 ;; that GCC expects: the first element in memory has the subreg offset
218 ;; associated with a big-endian lowpart while the last element in memory
219 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
221 ;; - We cannot use LDR and STR for spill slots that might be accessed
222 ;; via subregs, since although the elements have the order GCC expects,
223 ;; the order of the bytes within the elements is different. We instead
224 ;; access spill slots via LD1 and ST1, using secondary reloads to
225 ;; reserve a predicate register.
227 ;; -------------------------------------------------------------------------
228 ;; ---- Description of UNSPEC_PTEST
229 ;; -------------------------------------------------------------------------
231 ;; SVE provides a PTEST instruction for testing the active lanes of a
232 ;; predicate and setting the flags based on the result. The associated
233 ;; condition code tests are:
235 ;; - any (= ne): at least one active bit is set
236 ;; - none (= eq): all active bits are clear (*)
237 ;; - first (= mi): the first active bit is set
238 ;; - nfrst (= pl): the first active bit is clear (*)
239 ;; - last (= cc): the last active bit is set
240 ;; - nlast (= cs): the last active bit is clear (*)
242 ;; where the conditions marked (*) are also true when there are no active
243 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
244 ;; of a PTEST use the condition code mode CC_NZC.
246 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
247 ;; This means that for other predicate modes, we need a governing predicate
248 ;; in which all bits are defined.
250 ;; For example, most predicated .H operations ignore the odd bits of the
251 ;; governing predicate, so that an active lane is represented by the
252 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
253 ;; any value. To test a .H predicate, we instead need "10" and "00"
254 ;; respectively, so that the condition only tests the even bits of the
257 ;; Several instructions set the flags as a side-effect, in the same way
258 ;; that a separate PTEST would. It's important for code quality that we
259 ;; use these flags results as often as possible, particularly in the case
260 ;; of WHILE* and RDFFR.
262 ;; Also, some of the instructions that set the flags are unpredicated
263 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
264 ;; they were predicated on a PTRUE of that size. For example, a .S
265 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
268 ;; We therefore need to represent PTEST operations in a way that
269 ;; makes it easy to combine them with both predicated and unpredicated
270 ;; operations, while using a VNx16BI governing predicate for all
271 ;; predicate modes. We do this using:
273 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
277 ;; - GP is the real VNx16BI governing predicate
279 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
280 ;; GP to CAST_GP are guaranteed to be clear in GP.
282 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
283 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
284 ;; SVE_MAYBE_NOT_PTRUE otherwise.
286 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
288 ;; -------------------------------------------------------------------------
289 ;; ---- Description of UNSPEC_PRED_Z
290 ;; -------------------------------------------------------------------------
292 ;; SVE integer comparisons are predicated and return zero for inactive
293 ;; lanes. Sometimes we use them with predicates that are all-true and
294 ;; sometimes we use them with general predicates.
296 ;; The integer comparisons also set the flags and so build-in the effect
297 ;; of a PTEST. We therefore want to be able to combine integer comparison
298 ;; patterns with PTESTs of the result. One difficulty with doing this is
299 ;; that (as noted above) the PTEST is always a .B operation and so can place
300 ;; stronger requirements on the governing predicate than the comparison does.
302 ;; For example, when applying a separate PTEST to the result of a full-vector
303 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
304 ;; .B PTRUE. In constrast, the comparison might be predicated on either
305 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
306 ;; bits don't matter for .H operations.
308 ;; We therefore can't rely on a full-vector comparison using the same
309 ;; predicate register as a following PTEST. We instead need to remember
310 ;; whether a comparison is known to be a full-vector comparison and use
311 ;; this information in addition to a check for equal predicate registers.
312 ;; At the same time, it's useful to have a common representation for all
313 ;; integer comparisons, so that they can be handled by a single set of
316 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
318 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
322 ;; - GP is the governing predicate, of mode <M:VPRED>
324 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
325 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
328 ;; - CODE is the comparison code
330 ;; - OP0 and OP1 are the values being compared, of mode M
332 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
334 ;; -------------------------------------------------------------------------
335 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
336 ;; -------------------------------------------------------------------------
338 ;; Many SVE integer operations are predicated. We can generate them
339 ;; from four sources:
341 ;; (1) Using normal unpredicated optabs. In this case we need to create
342 ;; an all-true predicate register to act as the governing predicate
343 ;; for the SVE instruction. There are no inactive lanes, and thus
344 ;; the values of inactive lanes don't matter.
346 ;; (2) Using _x ACLE functions. In this case the function provides a
347 ;; specific predicate and some lanes might be inactive. However,
348 ;; as for (1), the values of the inactive lanes don't matter.
349 ;; We can make extra lanes active without changing the behavior
350 ;; (although for code-quality reasons we should avoid doing so
353 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
354 ;; These optabs have a predicate operand that specifies which lanes are
355 ;; active and another operand that provides the values of inactive lanes.
357 ;; (4) Using _m and _z ACLE functions. These functions map to the same
358 ;; patterns as (3), with the _z functions setting inactive lanes to zero
359 ;; and the _m functions setting the inactive lanes to one of the function
362 ;; For (1) and (2) we need a way of attaching the predicate to a normal
363 ;; unpredicated integer operation. We do this using:
365 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
367 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
368 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
369 ;; it always is for (1), but might not be for (2).
371 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
372 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
373 ;; but as noted above, we should only do so if there's a specific benefit.
375 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
377 ;; For (3) and (4) we can simply use the SVE port's normal representation
378 ;; of a predicate-based select:
380 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
382 ;; where INACTIVE specifies the values of inactive lanes.
384 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
385 ;; than inserting the integer operation directly. This is mostly useful
386 ;; if we want the combine pass to merge an integer operation with an explicit
387 ;; vcond_mask (in other words, with a following SEL instruction). However,
388 ;; it's generally better to merge such operations at the gimple level
391 ;; -------------------------------------------------------------------------
392 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
393 ;; -------------------------------------------------------------------------
395 ;; Most SVE floating-point operations are predicated. We can generate
396 ;; them from four sources:
398 ;; (1) Using normal unpredicated optabs. In this case we need to create
399 ;; an all-true predicate register to act as the governing predicate
400 ;; for the SVE instruction. There are no inactive lanes, and thus
401 ;; the values of inactive lanes don't matter.
403 ;; (2) Using _x ACLE functions. In this case the function provides a
404 ;; specific predicate and some lanes might be inactive. However,
405 ;; as for (1), the values of the inactive lanes don't matter.
407 ;; The instruction must have the same exception behavior as the
408 ;; function call unless things like command-line flags specifically
409 ;; allow otherwise. For example, with -ffast-math, it is OK to
410 ;; raise exceptions for inactive lanes, but normally it isn't.
412 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
413 ;; These optabs have a predicate operand that specifies which lanes are
414 ;; active and another operand that provides the values of inactive lanes.
416 ;; (4) Using _m and _z ACLE functions. These functions map to the same
417 ;; patterns as (3), with the _z functions setting inactive lanes to zero
418 ;; and the _m functions setting the inactive lanes to one of the function
423 ;; - In (1), the predicate is known to be all true and the pattern can use
424 ;; unpredicated operations where available.
426 ;; - In (2), the predicate might or might not be all true. The pattern can
427 ;; use unpredicated instructions if the predicate is all-true or if things
428 ;; like command-line flags allow exceptions for inactive lanes.
430 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
431 ;; might be inactive and inactive lanes of the result must have specific
432 ;; values. There is no scope for using unpredicated instructions (and no
433 ;; reason to want to), so the question about command-line flags doesn't
436 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
437 ;; in combination with a separate predicate operand, e.g.
439 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
440 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
443 ;; because (sqrt ...) can raise an exception for any lane, including
444 ;; inactive ones. We therefore need to use an unspec instead.
446 ;; Also, (2) requires some way of distinguishing the case in which the
447 ;; predicate might have inactive lanes and cannot be changed from the
448 ;; case in which the predicate has no inactive lanes or can be changed.
449 ;; This information is also useful when matching combined FP patterns
450 ;; in which the predicates might not be equal.
452 ;; We therefore model FP operations as an unspec of the form:
454 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
458 ;; - PRED is the governing predicate.
460 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
461 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
462 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
464 ;; - OP0 OP1 ... are the normal input operands to the operation.
466 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
468 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
469 ;; that selects between the result of the FP operation and the "else"
470 ;; value. (This else value is a merge input for _m ACLE functions
471 ;; and zero for _z ACLE functions.) The outer pattern then has the form:
473 ;; (unspec [pred fp_operation else_value] UNSPEC_SEL)
475 ;; This means that the patterns for (3) and (4) have two predicates:
476 ;; one for the FP operation itself and one for the UNSPEC_SEL.
477 ;; This pattern is equivalent to the result of combining an instance
478 ;; of (1) or (2) with a separate vcond instruction, so these patterns
479 ;; are useful as combine targets too.
481 ;; However, in the combine case, the instructions that we want to
482 ;; combine might use different predicates. Then:
484 ;; - Some of the active lanes of the FP operation might be discarded
485 ;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes,
486 ;; even for SVE_STRICT_GP, since the operations on those lanes are
487 ;; effectively dead code.
489 ;; - Some of the inactive lanes of the FP operation might be selected
490 ;; by the UNSPEC_SEL, giving unspecified values for those lanes.
491 ;; SVE_RELAXED_GP lets us extend the FP operation to cover these
492 ;; extra lanes, but SVE_STRICT_GP does not.
494 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
495 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
496 ;; This typically leads to patterns like:
498 ;; (unspec [(match_operand 1 "register_operand" "Upl")
499 ;; (unspec [(match_operand N)
500 ;; (const_int SVE_RELAXED_GP)
502 ;; UNSPEC_COND_<MNEMONIC>)
505 ;; where operand N is allowed to be anything. These instructions then
506 ;; have rewrite rules to replace operand N with operand 1, which gives the
507 ;; instructions a canonical form and means that the original operand N is
508 ;; not kept live unnecessarily.
510 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
511 ;; a subset of the FP operation predicate. This case isn't interesting
512 ;; for FP operations that have an all-true predicate, since such operations
513 ;; use SVE_RELAXED_GP instead. And it is not possible for instruction
514 ;; conditions to track the subset relationship for arbitrary registers.
515 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
516 ;; in which the predicates match:
518 ;; (unspec [(match_operand 1 "register_operand" "Upl")
519 ;; (unspec [(match_dup 1)
520 ;; (const_int SVE_STRICT_GP)
522 ;; UNSPEC_COND_<MNEMONIC>)
525 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
526 ;; be redundant with the one above. However, if the combine pattern
527 ;; has multiple FP operations, using a match_operand allows combinations
528 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
529 ;; that the predicates are the same:
531 ;; (unspec [(match_operand 1 "register_operand" "Upl")
533 ;; (unspec [(match_dup 1)
534 ;; (match_operand:SI N "aarch64_sve_gp_strictness")
536 ;; UNSPEC_COND_<MNEMONIC1>)
537 ;; (unspec [(match_dup 1)
538 ;; (match_operand:SI M "aarch64_sve_gp_strictness")
540 ;; UNSPEC_COND_<MNEMONIC2>) ...)
543 ;; The fully-relaxed version of this pattern is:
545 ;; (unspec [(match_operand 1 "register_operand" "Upl")
547 ;; (unspec [(match_operand:SI N)
548 ;; (const_int SVE_RELAXED_GP)
550 ;; UNSPEC_COND_<MNEMONIC1>)
551 ;; (unspec [(match_operand:SI M)
552 ;; (const_int SVE_RELAXED_GP)
554 ;; UNSPEC_COND_<MNEMONIC2>) ...)
557 ;; -------------------------------------------------------------------------
558 ;; ---- Note on FFR handling
559 ;; -------------------------------------------------------------------------
561 ;; Logically we want to divide FFR-related instructions into regions
562 ;; that contain exactly one of:
564 ;; - a single write to the FFR
565 ;; - any number of reads from the FFR (but only one read is likely)
566 ;; - any number of LDFF1 and LDNF1 instructions
568 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
569 ;; normal loads as far as possible. This means that they should be
570 ;; schedulable within a region in the same way that LD1 would be,
571 ;; and they should be deleted as dead if the result is unused. The loads
572 ;; should therefore not write to the FFR, since that would both serialize
573 ;; the loads with respect to each other and keep the loads live for any
576 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
577 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
578 ;; while using the FFRT keeps the instruction within its region.
581 ;; - Writes start a new FFRT region as well as setting the FFR:
583 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
585 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
586 ;; loads stay within the same FFRT region:
588 ;; L1: load data while using the FFRT
590 ;; In addition, any FFRT region that includes a load also has at least one
593 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
595 ;; to make it clear that the region both reads from and writes to the FFR.
597 ;; - Reads do the following:
599 ;; R1: FFRT = FFR [type == no_insn]
600 ;; R2: read from the FFRT
601 ;; R3: FFRT = update(FFRT) [type == no_insn]
603 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
604 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
605 ;; cannot move backwards across R3.
607 ;; This way, writes are only kept alive by later loads or reads,
608 ;; and write/read pairs fold normally. For two consecutive reads,
609 ;; the first R3 is made dead by the second R1, which in turn becomes
610 ;; redundant with the first R1. We then have:
612 ;; first R1: FFRT = FFR
613 ;; first read from the FFRT
614 ;; second read from the FFRT
615 ;; second R3: FFRT = update(FFRT)
617 ;; i.e. the two FFRT regions collapse into a single one with two
618 ;; independent reads.
620 ;; The model still prevents some valid optimizations though. For example,
621 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
622 ;; the L2 instructions.
624 ;; =========================================================================
626 ;; =========================================================================
628 ;; -------------------------------------------------------------------------
629 ;; ---- Moves of single vectors
630 ;; -------------------------------------------------------------------------
632 ;; - MOV (including aliases)
633 ;; - LD1B (contiguous form)
638 ;; - ST1B (contiguous form)
643 ;; -------------------------------------------------------------------------
645 (define_expand "mov<mode>"
646 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
647 (match_operand:SVE_ALL 1 "general_operand"))]
650 /* Use the predicated load and store patterns where possible.
651 This is required for big-endian targets (see the comment at the
652 head of the file) and increases the addressing choices for
654 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
655 && can_create_pseudo_p ())
657 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
661 if (CONSTANT_P (operands[1]))
663 aarch64_expand_mov_immediate (operands[0], operands[1]);
667 /* Optimize subregs on big-endian targets: we can use REV[BHW]
668 instead of going through memory. */
670 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
675 (define_expand "movmisalign<mode>"
676 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
677 (match_operand:SVE_ALL 1 "general_operand"))]
680 /* Equivalent to a normal move for our purpooses. */
681 emit_move_insn (operands[0], operands[1]);
686 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
687 ;; little-endian ordering is acceptable. Only allow memory operations during
688 ;; and after RA; before RA we want the predicated load and store patterns to
690 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
691 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand")
692 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand"))]
694 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
695 && ((lra_in_progress || reload_completed)
696 || (register_operand (operands[0], <MODE>mode)
697 && nonmemory_operand (operands[1], <MODE>mode)))"
699 [ w , Utr ] ldr\t%0, %1
700 [ Utr , w ] str\t%1, %0
701 [ w , w ] mov\t%0.d, %1.d
702 [ w , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
706 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
707 ;; or vectors for which little-endian ordering isn't acceptable. Memory
708 ;; accesses require secondary reloads.
709 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
710 [(set (match_operand:SVE_ALL 0 "register_operand")
711 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand"))]
713 && <MODE>mode != VNx16QImode
715 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
717 [ w , w ] mov\t%0.d, %1.d
718 [ w , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
722 ;; Handle memory reloads for modes that can't use LDR and STR. We use
723 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
724 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
725 (define_expand "aarch64_sve_reload_mem"
727 [(set (match_operand 0)
729 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
732 /* Create a PTRUE. */
733 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
735 /* Refer to the PTRUE in the appropriate mode for this move. */
736 machine_mode mode = GET_MODE (operands[0]);
737 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
739 /* Emit a predicated load or store. */
740 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
745 ;; A predicated move in which the predicate is known to be all-true.
746 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
747 ;; so changes to this pattern will need changes there as well.
748 (define_insn_and_split "@aarch64_pred_mov<mode>"
749 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
751 [(match_operand:<VPRED> 1 "register_operand")
752 (match_operand:SVE_ALL 2 "nonimmediate_operand")]
755 && (register_operand (operands[0], <MODE>mode)
756 || register_operand (operands[2], <MODE>mode))"
757 {@ [ cons: =0 , 1 , 2 ]
759 [ w , Upl , m ] ld1<Vesize>\t%0.<Vctype>, %1/z, %2
760 [ m , Upl , w ] st1<Vesize>\t%2.<Vctype>, %1, %0
762 "&& register_operand (operands[0], <MODE>mode)
763 && register_operand (operands[2], <MODE>mode)"
764 [(set (match_dup 0) (match_dup 2))]
767 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
768 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
769 ;; for details. We use a special predicate for operand 2 to reduce
770 ;; the number of patterns.
771 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
772 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
774 [(match_operand:VNx16BI 1 "register_operand" "Upl")
775 (match_operand 2 "aarch64_any_register_operand" "w")]
777 "TARGET_SVE && BYTES_BIG_ENDIAN"
779 "&& reload_completed"
782 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
787 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
788 ;; This is equivalent to a subreg on little-endian targets but not for
789 ;; big-endian; see the comment at the head of the file for details.
790 (define_expand "@aarch64_sve_reinterpret<mode>"
791 [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand")
792 (unspec:SVE_ALL_STRUCT
793 [(match_operand 1 "aarch64_any_register_operand")]
794 UNSPEC_REINTERPRET))]
797 machine_mode src_mode = GET_MODE (operands[1]);
798 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
800 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
806 ;; A pattern for handling type punning on big-endian targets. We use a
807 ;; special predicate for operand 1 to reduce the number of patterns.
808 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
809 [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w")
810 (unspec:SVE_ALL_STRUCT
811 [(match_operand 1 "aarch64_any_register_operand" "w")]
812 UNSPEC_REINTERPRET))]
815 "&& reload_completed"
816 [(set (match_dup 0) (match_dup 1))]
818 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
822 ;; -------------------------------------------------------------------------
823 ;; ---- Moves of multiple vectors
824 ;; -------------------------------------------------------------------------
825 ;; All patterns in this section are synthetic and split to real
826 ;; instructions after reload.
827 ;; -------------------------------------------------------------------------
829 (define_expand "mov<mode>"
830 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
831 (match_operand:SVE_STRUCT 1 "general_operand"))]
834 /* Big-endian loads and stores need to be done via LD1 and ST1;
835 see the comment at the head of the file for details. */
836 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
839 gcc_assert (can_create_pseudo_p ());
840 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
844 if (CONSTANT_P (operands[1]))
846 aarch64_expand_mov_immediate (operands[0], operands[1]);
852 ;; Unpredicated structure moves (little-endian).
853 (define_insn "*aarch64_sve_mov<mode>_le"
854 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
855 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
856 "TARGET_SVE && !BYTES_BIG_ENDIAN"
858 [(set_attr "length" "<insn_length>")]
861 ;; Unpredicated structure moves (big-endian). Memory accesses require
862 ;; secondary reloads.
863 (define_insn "*aarch64_sve_mov<mode>_be"
864 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
865 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
866 "TARGET_SVE && BYTES_BIG_ENDIAN"
868 [(set_attr "length" "<insn_length>")]
871 ;; Split unpredicated structure moves into pieces. This is the same
872 ;; for both big-endian and little-endian code, although it only needs
873 ;; to handle memory operands for little-endian code.
875 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
876 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
877 "TARGET_SVE && reload_completed"
880 rtx dest = operands[0];
881 rtx src = operands[1];
882 if (REG_P (dest) && REG_P (src))
883 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
885 for (unsigned int i = 0; i < <vector_count>; ++i)
887 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
888 i * BYTES_PER_SVE_VECTOR);
889 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
890 i * BYTES_PER_SVE_VECTOR);
891 emit_insn (gen_rtx_SET (subdest, subsrc));
897 ;; Predicated structure moves. This works for both endiannesses but in
898 ;; practice is only useful for big-endian.
899 (define_insn_and_split "@aarch64_pred_mov<mode>"
900 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
902 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
903 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
906 && (register_operand (operands[0], <MODE>mode)
907 || register_operand (operands[2], <MODE>mode))"
909 "&& reload_completed"
912 for (unsigned int i = 0; i < <vector_count>; ++i)
914 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
916 i * BYTES_PER_SVE_VECTOR);
917 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
919 i * BYTES_PER_SVE_VECTOR);
920 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
924 [(set_attr "length" "<insn_length>")]
927 ;; -------------------------------------------------------------------------
928 ;; ---- Moves of predicates
929 ;; -------------------------------------------------------------------------
937 ;; -------------------------------------------------------------------------
939 (define_expand "mov<mode>"
940 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
941 (match_operand:PRED_ALL 1 "general_operand"))]
944 if (GET_CODE (operands[0]) == MEM)
945 operands[1] = force_reg (<MODE>mode, operands[1]);
947 if (CONSTANT_P (operands[1]))
949 aarch64_expand_mov_immediate (operands[0], operands[1]);
955 (define_insn "*aarch64_sve_mov<mode>"
956 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
957 (match_operand:PRED_ALL 1 "aarch64_mov_operand"))]
959 && (register_operand (operands[0], <MODE>mode)
960 || register_operand (operands[1], <MODE>mode))"
962 [ Upa , Upa ] mov\t%0.b, %1.b
963 [ m , Upa ] str\t%1, %0
964 [ Upa , m ] ldr\t%0, %1
965 [ Upa , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
969 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
970 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
971 [(set (reg:CC_NZC CC_REGNUM)
975 (const_int SVE_KNOWN_PTRUE)
976 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
978 [(match_operand:SI 4 "const_int_operand")
979 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
982 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
986 return aarch64_output_sve_ptrues (operands[1]);
988 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
990 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
994 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
995 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
996 [(set (reg:CC_NZC CC_REGNUM)
1000 (const_int SVE_KNOWN_PTRUE)
1002 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1004 [(match_operand:SI 4 "const_int_operand")
1005 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1008 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1012 return aarch64_output_sve_ptrues (operands[1]);
1014 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1016 operands[2] = CONSTM1_RTX (VNx16BImode);
1017 operands[3] = CONSTM1_RTX (<MODE>mode);
1021 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1022 ;; a way of testing VL).
1023 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1024 [(set (reg:CC_NZC CC_REGNUM)
1028 (const_int SVE_KNOWN_PTRUE)
1029 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1031 [(match_operand:SI 4 "const_int_operand")
1032 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1035 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1038 return aarch64_output_sve_ptrues (operands[1]);
1040 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1042 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1046 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1047 ;; a way of testing VL).
1048 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1049 [(set (reg:CC_NZC CC_REGNUM)
1053 (const_int SVE_KNOWN_PTRUE)
1055 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1057 [(match_operand:SI 4 "const_int_operand")
1058 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1061 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1064 return aarch64_output_sve_ptrues (operands[1]);
1066 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1068 operands[2] = CONSTM1_RTX (VNx16BImode);
1069 operands[3] = CONSTM1_RTX (<MODE>mode);
1073 ;; -------------------------------------------------------------------------
1074 ;; ---- Moves of multiple predicates
1075 ;; -------------------------------------------------------------------------
1077 (define_insn_and_split "movvnx32bi"
1078 [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
1079 (match_operand:VNx32BI 1 "aarch64_mov_operand"))]
1086 "&& reload_completed"
1089 aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
1094 ;; -------------------------------------------------------------------------
1095 ;; ---- Moves relating to the FFR
1096 ;; -------------------------------------------------------------------------
1101 ;; -------------------------------------------------------------------------
1103 ;; [W1 in the block comment above about FFR handling]
1105 ;; Write to the FFR and start a new FFRT scheduling region.
1106 (define_insn "aarch64_wrffr"
1107 [(set (reg:VNx16BI FFR_REGNUM)
1108 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one"))
1109 (set (reg:VNx16BI FFRT_REGNUM)
1110 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1111 "TARGET_SVE && TARGET_NON_STREAMING"
1118 ;; [L2 in the block comment above about FFR handling]
1120 ;; Introduce a read from and write to the FFR in the current FFRT region,
1121 ;; so that the FFR value is live on entry to the region and so that the FFR
1122 ;; value visibly changes within the region. This is used (possibly multiple
1123 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1124 (define_insn "aarch64_update_ffr_for_load"
1125 [(set (reg:VNx16BI FFR_REGNUM)
1126 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1127 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1130 [(set_attr "type" "no_insn")]
1133 ;; [R1 in the block comment above about FFR handling]
1135 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1136 ;; can be read from there by the RDFFR instructions below. This acts
1137 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1138 ;; creates a natural dependency with earlier writes.
1139 (define_insn "aarch64_copy_ffr_to_ffrt"
1140 [(set (reg:VNx16BI FFRT_REGNUM)
1141 (reg:VNx16BI FFR_REGNUM))]
1144 [(set_attr "type" "no_insn")]
1147 ;; [R2 in the block comment above about FFR handling]
1149 ;; Read the FFR via the FFRT.
1150 (define_insn "aarch64_rdffr"
1151 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1152 (reg:VNx16BI FFRT_REGNUM))]
1153 "TARGET_SVE && TARGET_NON_STREAMING"
1157 ;; Likewise with zero predication.
1158 (define_insn "aarch64_rdffr_z"
1159 [(set (match_operand:VNx16BI 0 "register_operand")
1161 (reg:VNx16BI FFRT_REGNUM)
1162 (match_operand:VNx16BI 1 "register_operand")))]
1163 "TARGET_SVE && TARGET_NON_STREAMING"
1164 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1165 [ &Upa , Upa ; yes ] rdffr\t%0.b, %1/z
1166 [ ?Upa , 0Upa; yes ] ^
1167 [ Upa , Upa ; no ] ^
1171 ;; Read the FFR to test for a fault, without using the predicate result.
1172 (define_insn "*aarch64_rdffr_z_ptest"
1173 [(set (reg:CC_NZC CC_REGNUM)
1175 [(match_operand:VNx16BI 1 "register_operand")
1177 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1179 (reg:VNx16BI FFRT_REGNUM)
1182 (clobber (match_scratch:VNx16BI 0))]
1183 "TARGET_SVE && TARGET_NON_STREAMING"
1184 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1185 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1186 [ ?Upa , 0Upa; yes ] ^
1187 [ Upa , Upa ; no ] ^
1191 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1192 (define_insn "*aarch64_rdffr_ptest"
1193 [(set (reg:CC_NZC CC_REGNUM)
1195 [(match_operand:VNx16BI 1 "register_operand")
1197 (const_int SVE_KNOWN_PTRUE)
1198 (reg:VNx16BI FFRT_REGNUM)]
1200 (clobber (match_scratch:VNx16BI 0))]
1201 "TARGET_SVE && TARGET_NON_STREAMING"
1202 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1203 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1204 [ ?Upa , 0Upa; yes ] ^
1205 [ Upa , Upa ; no ] ^
1209 ;; Read the FFR with zero predication and test the result.
1210 (define_insn "*aarch64_rdffr_z_cc"
1211 [(set (reg:CC_NZC CC_REGNUM)
1213 [(match_operand:VNx16BI 1 "register_operand")
1215 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1217 (reg:VNx16BI FFRT_REGNUM)
1220 (set (match_operand:VNx16BI 0 "register_operand")
1222 (reg:VNx16BI FFRT_REGNUM)
1224 "TARGET_SVE && TARGET_NON_STREAMING"
1225 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1226 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1227 [ ?Upa , 0Upa; yes ] ^
1228 [ Upa , Upa ; no ] ^
1232 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1233 (define_insn "*aarch64_rdffr_cc"
1234 [(set (reg:CC_NZC CC_REGNUM)
1236 [(match_operand:VNx16BI 1 "register_operand")
1238 (const_int SVE_KNOWN_PTRUE)
1239 (reg:VNx16BI FFRT_REGNUM)]
1241 (set (match_operand:VNx16BI 0 "register_operand")
1242 (reg:VNx16BI FFRT_REGNUM))]
1243 "TARGET_SVE && TARGET_NON_STREAMING"
1244 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1245 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1246 [ ?Upa , 0Upa; yes ] ^
1247 [ Upa , Upa ; no ] ^
1251 ;; [R3 in the block comment above about FFR handling]
1253 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1254 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1255 (define_insn "aarch64_update_ffrt"
1256 [(set (reg:VNx16BI FFRT_REGNUM)
1257 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1260 [(set_attr "type" "no_insn")]
1263 ;; =========================================================================
1265 ;; =========================================================================
1267 ;; -------------------------------------------------------------------------
1268 ;; ---- Normal contiguous loads
1269 ;; -------------------------------------------------------------------------
1270 ;; Includes contiguous forms of:
1287 ;; -------------------------------------------------------------------------
1289 ;; Predicated LD1 (single).
1290 (define_insn "maskload<mode><vpred>"
1291 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1293 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1294 (match_operand:SVE_ALL 1 "memory_operand" "m")]
1297 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1300 ;; Unpredicated LD[234].
1301 (define_expand "vec_load_lanes<mode><vsingle>"
1302 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1305 (match_operand:SVE_STRUCT 1 "memory_operand")]
1309 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1313 ;; Predicated LD[234].
1314 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1315 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1317 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1318 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1321 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1324 ;; -------------------------------------------------------------------------
1325 ;; ---- Extending contiguous loads
1326 ;; -------------------------------------------------------------------------
1327 ;; Includes contiguous forms of:
1334 ;; -------------------------------------------------------------------------
1336 ;; Predicated load and extend, with 8 elements per 128-bit block.
1337 (define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1338 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1340 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1341 (ANY_EXTEND:SVE_HSDI
1342 (unspec:SVE_PARTIAL_I
1343 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1344 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1347 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1348 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1349 "&& !CONSTANT_P (operands[3])"
1351 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1355 ;; -------------------------------------------------------------------------
1356 ;; ---- First-faulting contiguous loads
1357 ;; -------------------------------------------------------------------------
1358 ;; Includes contiguous forms of:
1367 ;; -------------------------------------------------------------------------
1369 ;; Contiguous non-extending first-faulting or non-faulting loads.
1370 (define_insn "@aarch64_ld<fn>f1<mode>"
1371 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1373 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1374 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1375 (reg:VNx16BI FFRT_REGNUM)]
1377 "TARGET_SVE && TARGET_NON_STREAMING"
1378 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1381 ;; -------------------------------------------------------------------------
1382 ;; ---- First-faulting extending contiguous loads
1383 ;; -------------------------------------------------------------------------
1384 ;; Includes contiguous forms of:
1397 ;; -------------------------------------------------------------------------
1399 ;; Predicated first-faulting or non-faulting load and extend.
1400 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1401 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1403 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1404 (ANY_EXTEND:SVE_HSDI
1405 (unspec:SVE_PARTIAL_I
1406 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1407 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1408 (reg:VNx16BI FFRT_REGNUM)]
1412 && TARGET_NON_STREAMING
1413 && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1414 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1415 "&& !CONSTANT_P (operands[3])"
1417 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1421 ;; -------------------------------------------------------------------------
1422 ;; ---- Non-temporal contiguous loads
1423 ;; -------------------------------------------------------------------------
1429 ;; -------------------------------------------------------------------------
1431 ;; Predicated contiguous non-temporal load (single).
1432 (define_insn "@aarch64_ldnt1<mode>"
1433 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1435 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1436 (match_operand:SVE_FULL 1 "memory_operand" "m")]
1439 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1442 ;; -------------------------------------------------------------------------
1443 ;; ---- Normal gather loads
1444 ;; -------------------------------------------------------------------------
1445 ;; Includes gather forms of:
1448 ;; -------------------------------------------------------------------------
1450 ;; Unpredicated gather loads.
1451 (define_expand "gather_load<mode><v_int_container>"
1452 [(set (match_operand:SVE_24 0 "register_operand")
1455 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1456 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1457 (match_operand:DI 3 "const_int_operand")
1458 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1459 (mem:BLK (scratch))]
1460 UNSPEC_LD1_GATHER))]
1461 "TARGET_SVE && TARGET_NON_STREAMING"
1463 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1467 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1468 ;; unsigned extension and false for signed extension.
1469 (define_insn "mask_gather_load<mode><v_int_container>"
1470 [(set (match_operand:SVE_4 0 "register_operand")
1472 [(match_operand:VNx4BI 5 "register_operand")
1473 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1474 (match_operand:VNx4SI 2 "register_operand")
1475 (match_operand:DI 3 "const_int_operand")
1476 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1477 (mem:BLK (scratch))]
1478 UNSPEC_LD1_GATHER))]
1479 "TARGET_SVE && TARGET_NON_STREAMING"
1480 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1481 [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1482 [?w, Z, 0, Ui1, Ui1, Upl] ^
1483 [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1484 [?w, vgw, 0, Ui1, Ui1, Upl] ^
1485 [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1486 [?w, rk, 0, Z, Ui1, Upl] ^
1487 [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1488 [?w, rk, 0, Ui1, Ui1, Upl] ^
1489 [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1490 [?w, rk, 0, Z, i, Upl] ^
1491 [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1492 [?w, rk, 0, Ui1, i, Upl] ^
1496 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1497 ;; doesn't matter in this case.
1498 (define_insn "mask_gather_load<mode><v_int_container>"
1499 [(set (match_operand:SVE_2 0 "register_operand")
1501 [(match_operand:VNx2BI 5 "register_operand")
1502 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1503 (match_operand:VNx2DI 2 "register_operand")
1504 (match_operand:DI 3 "const_int_operand")
1505 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1506 (mem:BLK (scratch))]
1507 UNSPEC_LD1_GATHER))]
1508 "TARGET_SVE && TARGET_NON_STREAMING"
1509 {@ [cons: =0, 1, 2, 3, 4, 5]
1510 [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1511 [?w, Z, 0, i, Ui1, Upl] ^
1512 [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1513 [?w, vgd, 0, i, Ui1, Upl] ^
1514 [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1515 [?w, rk, 0, i, Ui1, Upl] ^
1516 [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1517 [?w, rk, 0, i, i, Upl] ^
1521 ;; Likewise, but with the offset being extended from 32 bits.
1522 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1523 [(set (match_operand:SVE_2 0 "register_operand")
1525 [(match_operand:VNx2BI 5 "register_operand")
1526 (match_operand:DI 1 "register_operand")
1530 (match_operand:VNx2SI 2 "register_operand"))]
1532 (match_operand:DI 3 "const_int_operand")
1533 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1534 (mem:BLK (scratch))]
1535 UNSPEC_LD1_GATHER))]
1536 "TARGET_SVE && TARGET_NON_STREAMING"
1537 {@ [cons: =0, 1, 2, 3, 4, 5]
1538 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1539 [?w, rk, 0, i, Ui1, Upl ] ^
1540 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
1541 [?w, rk, 0, i, i, Upl ] ^
1543 "&& !CONSTANT_P (operands[6])"
1545 operands[6] = CONSTM1_RTX (VNx2BImode);
1549 ;; Likewise, but with the offset being truncated to 32 bits and then
1551 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1552 [(set (match_operand:SVE_2 0 "register_operand")
1554 [(match_operand:VNx2BI 5 "register_operand")
1555 (match_operand:DI 1 "register_operand")
1560 (match_operand:VNx2DI 2 "register_operand")))]
1562 (match_operand:DI 3 "const_int_operand")
1563 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1564 (mem:BLK (scratch))]
1565 UNSPEC_LD1_GATHER))]
1566 "TARGET_SVE && TARGET_NON_STREAMING"
1567 {@ [cons: =0, 1, 2, 3, 4, 5]
1568 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1569 [?w, rk, 0, i, Ui1, Upl ] ^
1570 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1571 [?w, rk, 0, i, i, Upl ] ^
1573 "&& !CONSTANT_P (operands[6])"
1575 operands[6] = CONSTM1_RTX (VNx2BImode);
1579 ;; Likewise, but with the offset being truncated to 32 bits and then
1581 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1582 [(set (match_operand:SVE_2 0 "register_operand")
1584 [(match_operand:VNx2BI 5 "register_operand")
1585 (match_operand:DI 1 "register_operand")
1587 (match_operand:VNx2DI 2 "register_operand")
1588 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1589 (match_operand:DI 3 "const_int_operand")
1590 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1591 (mem:BLK (scratch))]
1592 UNSPEC_LD1_GATHER))]
1593 "TARGET_SVE && TARGET_NON_STREAMING"
1594 {@ [cons: =0, 1, 2, 3, 4, 5]
1595 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1596 [?w, rk, 0, i, Ui1, Upl ] ^
1597 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1598 [?w, rk, 0, i, i, Upl ] ^
1602 ;; -------------------------------------------------------------------------
1603 ;; ---- Extending gather loads
1604 ;; -------------------------------------------------------------------------
1605 ;; Includes gather forms of:
1612 ;; -------------------------------------------------------------------------
1614 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1615 ;; true for unsigned extension and false for signed extension.
1616 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1617 [(set (match_operand:SVE_4HSI 0 "register_operand")
1619 [(match_operand:VNx4BI 6 "general_operand")
1620 (ANY_EXTEND:SVE_4HSI
1622 [(match_operand:VNx4BI 5 "register_operand")
1623 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>")
1624 (match_operand:VNx4SI 2 "register_operand")
1625 (match_operand:DI 3 "const_int_operand")
1626 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
1627 (mem:BLK (scratch))]
1628 UNSPEC_LD1_GATHER))]
1631 && TARGET_NON_STREAMING
1632 && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1633 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1634 [&w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1635 [?w, Z, 0, Ui1, Ui1, Upl, UplDnm] ^
1636 [&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1637 [?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^
1638 [&w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1639 [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
1640 [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1641 [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
1642 [&w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1643 [?w, rk, 0, Z, i, Upl, UplDnm] ^
1644 [&w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1645 [?w, rk, 0, Ui1, i, Upl, UplDnm] ^
1647 "&& !CONSTANT_P (operands[6])"
1649 operands[6] = CONSTM1_RTX (VNx4BImode);
1653 ;; Predicated extending gather loads for 64-bit elements. The value of
1654 ;; operand 3 doesn't matter in this case.
1655 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1656 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1658 [(match_operand:VNx2BI 6 "general_operand")
1659 (ANY_EXTEND:SVE_2HSDI
1661 [(match_operand:VNx2BI 5 "register_operand")
1662 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>")
1663 (match_operand:VNx2DI 2 "register_operand")
1664 (match_operand:DI 3 "const_int_operand")
1665 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1666 (mem:BLK (scratch))]
1667 UNSPEC_LD1_GATHER))]
1670 && TARGET_NON_STREAMING
1671 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1672 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1673 [&w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1674 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
1675 [&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1676 [?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1677 [&w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1678 [?w, rk, 0, i, Ui1, Upl, UplDnm] ^
1679 [&w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1680 [?w, rk, 0, i, i, Upl, UplDnm] ^
1682 "&& !CONSTANT_P (operands[6])"
1684 operands[6] = CONSTM1_RTX (VNx2BImode);
1688 ;; Likewise, but with the offset being extended from 32 bits.
1689 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1690 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1693 (ANY_EXTEND:SVE_2HSDI
1695 [(match_operand:VNx2BI 5 "register_operand")
1696 (match_operand:DI 1 "aarch64_reg_or_zero")
1700 (match_operand:VNx2SI 2 "register_operand"))]
1702 (match_operand:DI 3 "const_int_operand")
1703 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1704 (mem:BLK (scratch))]
1705 UNSPEC_LD1_GATHER))]
1708 && TARGET_NON_STREAMING
1709 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1710 {@ [cons: =0, 1, 2, 3, 4, 5]
1711 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1712 [?w, rk, 0, i, Ui1, Upl ] ^
1713 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
1714 [?w, rk, 0, i, i, Upl ] ^
1716 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1718 operands[6] = CONSTM1_RTX (VNx2BImode);
1719 operands[7] = CONSTM1_RTX (VNx2BImode);
1723 ;; Likewise, but with the offset being truncated to 32 bits and then
1725 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1726 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1729 (ANY_EXTEND:SVE_2HSDI
1731 [(match_operand:VNx2BI 5 "register_operand")
1732 (match_operand:DI 1 "aarch64_reg_or_zero")
1737 (match_operand:VNx2DI 2 "register_operand")))]
1739 (match_operand:DI 3 "const_int_operand")
1740 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1741 (mem:BLK (scratch))]
1742 UNSPEC_LD1_GATHER))]
1745 && TARGET_NON_STREAMING
1746 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1747 {@ [cons: =0, 1, 2, 3, 4, 5]
1748 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1749 [?w, rk, 0, i, Ui1, Upl ] ^
1750 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1751 [?w, rk, 0, i, i, Upl ] ^
1753 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1755 operands[6] = CONSTM1_RTX (VNx2BImode);
1756 operands[7] = CONSTM1_RTX (VNx2BImode);
1760 ;; Likewise, but with the offset being truncated to 32 bits and then
1762 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1763 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1766 (ANY_EXTEND:SVE_2HSDI
1768 [(match_operand:VNx2BI 5 "register_operand")
1769 (match_operand:DI 1 "aarch64_reg_or_zero")
1771 (match_operand:VNx2DI 2 "register_operand")
1772 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1773 (match_operand:DI 3 "const_int_operand")
1774 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1775 (mem:BLK (scratch))]
1776 UNSPEC_LD1_GATHER))]
1779 && TARGET_NON_STREAMING
1780 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1781 {@ [cons: =0, 1, 2, 3, 4, 5]
1782 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1783 [?w, rk, 0, i, Ui1, Upl ] ^
1784 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1785 [?w, rk, 0, i, i, Upl ] ^
1787 "&& !CONSTANT_P (operands[7])"
1789 operands[7] = CONSTM1_RTX (VNx2BImode);
1793 ;; -------------------------------------------------------------------------
1794 ;; ---- First-faulting gather loads
1795 ;; -------------------------------------------------------------------------
1796 ;; Includes gather forms of:
1799 ;; -------------------------------------------------------------------------
1801 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1802 ;; 3 is true for unsigned extension and false for signed extension.
1803 (define_insn "@aarch64_ldff1_gather<mode>"
1804 [(set (match_operand:SVE_FULL_S 0 "register_operand")
1806 [(match_operand:VNx4BI 5 "register_operand")
1807 (match_operand:DI 1 "aarch64_sve_gather_offset_w")
1808 (match_operand:VNx4SI 2 "register_operand")
1809 (match_operand:DI 3 "const_int_operand")
1810 (match_operand:DI 4 "aarch64_gather_scale_operand_w")
1812 (reg:VNx16BI FFRT_REGNUM)]
1813 UNSPEC_LDFF1_GATHER))]
1814 "TARGET_SVE && TARGET_NON_STREAMING"
1815 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1816 [&w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
1817 [?w, Z, 0, i, Ui1, Upl] ^
1818 [&w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
1819 [?w, vgw, 0, i, Ui1, Upl] ^
1820 [&w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1821 [?w, rk, 0, Z, Ui1, Upl] ^
1822 [&w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1823 [?w, rk, 0, Ui1, Ui1, Upl] ^
1824 [&w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1825 [?w, rk, 0, Z, i, Upl] ^
1826 [&w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1827 [?w, rk, 0, Ui1, i, Upl] ^
1831 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1832 ;; of operand 3 doesn't matter in this case.
1833 (define_insn "@aarch64_ldff1_gather<mode>"
1834 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1836 [(match_operand:VNx2BI 5 "register_operand")
1837 (match_operand:DI 1 "aarch64_sve_gather_offset_d")
1838 (match_operand:VNx2DI 2 "register_operand")
1839 (match_operand:DI 3 "const_int_operand")
1840 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1842 (reg:VNx16BI FFRT_REGNUM)]
1843 UNSPEC_LDFF1_GATHER))]
1844 "TARGET_SVE && TARGET_NON_STREAMING"
1845 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1846 [&w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
1847 [?w, Z, 0, i, Ui1, Upl ] ^
1848 [&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
1849 [?w, vgd, 0, i, Ui1, Upl ] ^
1850 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
1851 [?w, rk, 0, i, Ui1, Upl ] ^
1852 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1853 [?w, rk, 0, i, i, Upl ] ^
1857 ;; Likewise, but with the offset being sign-extended from 32 bits.
1858 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1859 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1861 [(match_operand:VNx2BI 5 "register_operand")
1862 (match_operand:DI 1 "register_operand")
1867 (match_operand:VNx2DI 2 "register_operand")))]
1869 (match_operand:DI 3 "const_int_operand")
1870 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1872 (reg:VNx16BI FFRT_REGNUM)]
1873 UNSPEC_LDFF1_GATHER))]
1874 "TARGET_SVE && TARGET_NON_STREAMING"
1875 {@ [cons: =0, 1, 2, 3, 4, 5]
1876 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1877 [?w, rk, 0, i, Ui1, Upl ] ^
1878 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1879 [?w, rk, 0, i, i, Upl ] ^
1881 "&& !CONSTANT_P (operands[6])"
1883 operands[6] = CONSTM1_RTX (VNx2BImode);
1887 ;; Likewise, but with the offset being zero-extended from 32 bits.
1888 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1889 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1891 [(match_operand:VNx2BI 5 "register_operand")
1892 (match_operand:DI 1 "register_operand")
1894 (match_operand:VNx2DI 2 "register_operand")
1895 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1896 (match_operand:DI 3 "const_int_operand")
1897 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1899 (reg:VNx16BI FFRT_REGNUM)]
1900 UNSPEC_LDFF1_GATHER))]
1901 "TARGET_SVE && TARGET_NON_STREAMING"
1902 {@ [cons: =0, 1, 2, 3, 4, 5]
1903 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1904 [?w, rk, 0, i, Ui1, Upl ] ^
1905 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1906 [?w, rk, 0, i, i, Upl ] ^
1910 ;; -------------------------------------------------------------------------
1911 ;; ---- First-faulting extending gather loads
1912 ;; -------------------------------------------------------------------------
1913 ;; Includes gather forms of:
1920 ;; -------------------------------------------------------------------------
1922 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1923 ;; Operand 3 is true for unsigned extension and false for signed extension.
1924 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1925 [(set (match_operand:VNx4_WIDE 0 "register_operand")
1927 [(match_operand:VNx4BI 6 "general_operand")
1928 (ANY_EXTEND:VNx4_WIDE
1930 [(match_operand:VNx4BI 5 "register_operand")
1931 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>")
1932 (match_operand:VNx4_WIDE 2 "register_operand")
1933 (match_operand:DI 3 "const_int_operand")
1934 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>")
1936 (reg:VNx16BI FFRT_REGNUM)]
1937 UNSPEC_LDFF1_GATHER))]
1939 "TARGET_SVE && TARGET_NON_STREAMING"
1940 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1941 [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1942 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
1943 [&w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1944 [?w, vg<VNx4_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1945 [&w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1946 [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
1947 [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1948 [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
1949 [&w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1950 [?w, rk, 0, Z, i, Upl, UplDnm] ^
1951 [&w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1952 [?w, rk, 0, Ui1, i, Upl, UplDnm] ^
1954 "&& !CONSTANT_P (operands[6])"
1956 operands[6] = CONSTM1_RTX (VNx4BImode);
1960 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1961 ;; The value of operand 3 doesn't matter in this case.
1962 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1963 [(set (match_operand:VNx2_WIDE 0 "register_operand")
1965 [(match_operand:VNx2BI 6 "general_operand")
1966 (ANY_EXTEND:VNx2_WIDE
1968 [(match_operand:VNx2BI 5 "register_operand")
1969 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>")
1970 (match_operand:VNx2_WIDE 2 "register_operand")
1971 (match_operand:DI 3 "const_int_operand")
1972 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
1974 (reg:VNx16BI FFRT_REGNUM)]
1975 UNSPEC_LDFF1_GATHER))]
1977 "TARGET_SVE && TARGET_NON_STREAMING"
1978 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1979 [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1980 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
1981 [&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1982 [?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1983 [&w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1984 [?w, rk, 0, i, Ui1, Upl, UplDnm] ^
1985 [&w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1986 [?w, rk, w, i, i, Upl, UplDnm] ^
1988 "&& !CONSTANT_P (operands[6])"
1990 operands[6] = CONSTM1_RTX (VNx2BImode);
1994 ;; Likewise, but with the offset being sign-extended from 32 bits.
1995 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1996 [(set (match_operand:VNx2_WIDE 0 "register_operand")
1999 (ANY_EXTEND:VNx2_WIDE
2001 [(match_operand:VNx2BI 5 "register_operand")
2002 (match_operand:DI 1 "aarch64_reg_or_zero")
2007 (match_operand:VNx2DI 2 "register_operand")))]
2009 (match_operand:DI 3 "const_int_operand")
2010 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2012 (reg:VNx16BI FFRT_REGNUM)]
2013 UNSPEC_LDFF1_GATHER))]
2015 "TARGET_SVE && TARGET_NON_STREAMING"
2016 {@ [cons: =0, 1, 2, 3, 4, 5]
2017 [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
2018 [?w, rk, 0, i, Ui1, Upl ] ^
2019 [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
2020 [?w, rk, 0, i, i, Upl ] ^
2022 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
2024 operands[6] = CONSTM1_RTX (VNx2BImode);
2025 operands[7] = CONSTM1_RTX (VNx2BImode);
2029 ;; Likewise, but with the offset being zero-extended from 32 bits.
2030 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
2031 [(set (match_operand:VNx2_WIDE 0 "register_operand")
2034 (ANY_EXTEND:VNx2_WIDE
2036 [(match_operand:VNx2BI 5 "register_operand")
2037 (match_operand:DI 1 "aarch64_reg_or_zero")
2039 (match_operand:VNx2DI 2 "register_operand")
2040 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2041 (match_operand:DI 3 "const_int_operand")
2042 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2044 (reg:VNx16BI FFRT_REGNUM)]
2045 UNSPEC_LDFF1_GATHER))]
2047 "TARGET_SVE && TARGET_NON_STREAMING"
2048 {@ [cons: =0, 1, 2, 3, 4, 5]
2049 [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
2050 [?w, rk, 0, i, Ui1, Upl ] ^
2051 [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
2052 [?w, rk, 0, i, i, Upl ] ^
2054 "&& !CONSTANT_P (operands[7])"
2056 operands[7] = CONSTM1_RTX (VNx2BImode);
2060 ;; =========================================================================
2062 ;; =========================================================================
2064 ;; -------------------------------------------------------------------------
2065 ;; ---- Contiguous prefetches
2066 ;; -------------------------------------------------------------------------
2067 ;; Includes contiguous forms of:
2072 ;; -------------------------------------------------------------------------
2074 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
2075 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
2077 (define_insn "@aarch64_sve_prefetch<mode>"
2078 [(prefetch (unspec:DI
2079 [(match_operand:<VPRED> 0 "register_operand" "Upl")
2080 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
2081 (match_operand:DI 2 "const_int_operand")]
2082 UNSPEC_SVE_PREFETCH)
2083 (match_operand:DI 3 "const_int_operand")
2084 (match_operand:DI 4 "const_int_operand"))]
2087 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
2088 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
2092 ;; -------------------------------------------------------------------------
2093 ;; ---- Gather prefetches
2094 ;; -------------------------------------------------------------------------
2095 ;; Includes gather forms of:
2100 ;; -------------------------------------------------------------------------
2102 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
2104 ;; 0: the governing predicate
2105 ;; 1: the scalar component of the address
2106 ;; 2: the vector component of the address
2107 ;; 3: 1 for zero extension, 0 for sign extension
2108 ;; 4: the scale multiplier
2109 ;; 5: a vector zero that identifies the mode of data being accessed
2110 ;; 6: the prefetch operator (an svprfop)
2111 ;; 7: the normal RTL prefetch rw flag
2112 ;; 8: the normal RTL prefetch locality value
2113 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
2114 [(prefetch (unspec:DI
2115 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2116 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
2117 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
2118 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
2119 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2120 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2121 (match_operand:DI 6 "const_int_operand")]
2122 UNSPEC_SVE_PREFETCH_GATHER)
2123 (match_operand:DI 7 "const_int_operand")
2124 (match_operand:DI 8 "const_int_operand"))]
2125 "TARGET_SVE && TARGET_NON_STREAMING"
2127 static const char *const insns[][2] = {
2128 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
2129 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
2130 "prfb", "%0, [%1, %2.s, sxtw]",
2131 "prfb", "%0, [%1, %2.s, uxtw]",
2132 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
2133 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
2135 const char *const *parts = insns[which_alternative];
2136 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2140 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
2141 ;; doesn't matter in this case.
2142 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2143 [(prefetch (unspec:DI
2144 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2145 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2146 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2147 (match_operand:DI 3 "const_int_operand")
2148 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2149 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2150 (match_operand:DI 6 "const_int_operand")]
2151 UNSPEC_SVE_PREFETCH_GATHER)
2152 (match_operand:DI 7 "const_int_operand")
2153 (match_operand:DI 8 "const_int_operand"))]
2154 "TARGET_SVE && TARGET_NON_STREAMING"
2156 static const char *const insns[][2] = {
2157 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2158 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2159 "prfb", "%0, [%1, %2.d]",
2160 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2162 const char *const *parts = insns[which_alternative];
2163 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2167 ;; Likewise, but with the offset being sign-extended from 32 bits.
2168 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2169 [(prefetch (unspec:DI
2170 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2171 (match_operand:DI 1 "register_operand" "rk, rk")
2176 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2178 (match_operand:DI 3 "const_int_operand")
2179 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2180 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2181 (match_operand:DI 6 "const_int_operand")]
2182 UNSPEC_SVE_PREFETCH_GATHER)
2183 (match_operand:DI 7 "const_int_operand")
2184 (match_operand:DI 8 "const_int_operand"))]
2185 "TARGET_SVE && TARGET_NON_STREAMING"
2187 static const char *const insns[][2] = {
2188 "prfb", "%0, [%1, %2.d, sxtw]",
2189 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2191 const char *const *parts = insns[which_alternative];
2192 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2194 "&& !rtx_equal_p (operands[0], operands[9])"
2196 operands[9] = copy_rtx (operands[0]);
2200 ;; Likewise, but with the offset being zero-extended from 32 bits.
2201 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2202 [(prefetch (unspec:DI
2203 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2204 (match_operand:DI 1 "register_operand" "rk, rk")
2206 (match_operand:VNx2DI 2 "register_operand" "w, w")
2207 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2208 (match_operand:DI 3 "const_int_operand")
2209 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2210 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2211 (match_operand:DI 6 "const_int_operand")]
2212 UNSPEC_SVE_PREFETCH_GATHER)
2213 (match_operand:DI 7 "const_int_operand")
2214 (match_operand:DI 8 "const_int_operand"))]
2215 "TARGET_SVE && TARGET_NON_STREAMING"
2217 static const char *const insns[][2] = {
2218 "prfb", "%0, [%1, %2.d, uxtw]",
2219 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2221 const char *const *parts = insns[which_alternative];
2222 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2226 ;; =========================================================================
2228 ;; =========================================================================
2230 ;; -------------------------------------------------------------------------
2231 ;; ---- Normal contiguous stores
2232 ;; -------------------------------------------------------------------------
2233 ;; Includes contiguous forms of:
2250 ;; -------------------------------------------------------------------------
2252 ;; Predicated ST1 (single).
2253 (define_insn "maskstore<mode><vpred>"
2254 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2256 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2257 (match_operand:SVE_ALL 1 "register_operand" "w")
2261 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2264 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2265 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2266 ;; There doesn't seem to be any obvious benefit to treating the all-true
2267 ;; case differently though. In particular, it's very unlikely that we'll
2268 ;; only find out during RTL that a store_lanes is dead.
2269 (define_expand "vec_store_lanes<mode><vsingle>"
2270 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2273 (match_operand:SVE_STRUCT 1 "register_operand")
2278 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2282 ;; Predicated ST[234].
2283 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2284 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2286 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2287 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2291 "st<vector_count><Vesize>\t%1, %2, %0"
2294 ;; -------------------------------------------------------------------------
2295 ;; ---- Truncating contiguous stores
2296 ;; -------------------------------------------------------------------------
2301 ;; -------------------------------------------------------------------------
2303 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2304 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2305 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2307 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2308 (truncate:VNx8_NARROW
2309 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2313 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2316 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2317 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2318 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2320 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2321 (truncate:VNx4_NARROW
2322 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2326 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2329 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2330 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2331 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2333 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2334 (truncate:VNx2_NARROW
2335 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2339 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2342 ;; -------------------------------------------------------------------------
2343 ;; ---- Non-temporal contiguous stores
2344 ;; -------------------------------------------------------------------------
2350 ;; -------------------------------------------------------------------------
2352 (define_insn "@aarch64_stnt1<mode>"
2353 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2355 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2356 (match_operand:SVE_FULL 1 "register_operand" "w")
2360 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2363 ;; -------------------------------------------------------------------------
2364 ;; ---- Normal scatter stores
2365 ;; -------------------------------------------------------------------------
2366 ;; Includes scatter forms of:
2369 ;; -------------------------------------------------------------------------
2371 ;; Unpredicated scatter stores.
2372 (define_expand "scatter_store<mode><v_int_container>"
2373 [(set (mem:BLK (scratch))
2376 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2377 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2378 (match_operand:DI 2 "const_int_operand")
2379 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2380 (match_operand:SVE_24 4 "register_operand")]
2381 UNSPEC_ST1_SCATTER))]
2382 "TARGET_SVE && TARGET_NON_STREAMING"
2384 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2388 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2389 ;; unsigned extension and false for signed extension.
2390 (define_insn "mask_scatter_store<mode><v_int_container>"
2391 [(set (mem:BLK (scratch))
2393 [(match_operand:VNx4BI 5 "register_operand")
2394 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2395 (match_operand:VNx4SI 1 "register_operand")
2396 (match_operand:DI 2 "const_int_operand")
2397 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2398 (match_operand:SVE_4 4 "register_operand")]
2399 UNSPEC_ST1_SCATTER))]
2400 "TARGET_SVE && TARGET_NON_STREAMING"
2401 {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2402 [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2403 [ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2404 [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2405 [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2406 [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2407 [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2411 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2412 ;; doesn't matter in this case.
2413 (define_insn "mask_scatter_store<mode><v_int_container>"
2414 [(set (mem:BLK (scratch))
2416 [(match_operand:VNx2BI 5 "register_operand")
2417 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2418 (match_operand:VNx2DI 1 "register_operand")
2419 (match_operand:DI 2 "const_int_operand")
2420 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2421 (match_operand:SVE_2 4 "register_operand")]
2422 UNSPEC_ST1_SCATTER))]
2423 "TARGET_SVE && TARGET_NON_STREAMING"
2424 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2425 [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2426 [ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2427 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2428 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2432 ;; Likewise, but with the offset being extended from 32 bits.
2433 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2434 [(set (mem:BLK (scratch))
2436 [(match_operand:VNx2BI 5 "register_operand")
2437 (match_operand:DI 0 "register_operand")
2441 (match_operand:VNx2SI 1 "register_operand"))]
2443 (match_operand:DI 2 "const_int_operand")
2444 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2445 (match_operand:SVE_2 4 "register_operand")]
2446 UNSPEC_ST1_SCATTER))]
2447 "TARGET_SVE && TARGET_NON_STREAMING"
2448 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2449 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2450 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]
2452 "&& !CONSTANT_P (operands[6])"
2454 operands[6] = CONSTM1_RTX (<VPRED>mode);
2458 ;; Likewise, but with the offset being truncated to 32 bits and then
2460 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2461 [(set (mem:BLK (scratch))
2463 [(match_operand:VNx2BI 5 "register_operand")
2464 (match_operand:DI 0 "register_operand")
2469 (match_operand:VNx2DI 1 "register_operand")))]
2471 (match_operand:DI 2 "const_int_operand")
2472 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2473 (match_operand:SVE_2 4 "register_operand")]
2474 UNSPEC_ST1_SCATTER))]
2475 "TARGET_SVE && TARGET_NON_STREAMING"
2476 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2477 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2478 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2480 "&& !CONSTANT_P (operands[6])"
2482 operands[6] = CONSTM1_RTX (<VPRED>mode);
2486 ;; Likewise, but with the offset being truncated to 32 bits and then
2488 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2489 [(set (mem:BLK (scratch))
2491 [(match_operand:VNx2BI 5 "register_operand")
2492 (match_operand:DI 0 "aarch64_reg_or_zero")
2494 (match_operand:VNx2DI 1 "register_operand")
2495 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2496 (match_operand:DI 2 "const_int_operand")
2497 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2498 (match_operand:SVE_2 4 "register_operand")]
2499 UNSPEC_ST1_SCATTER))]
2500 "TARGET_SVE && TARGET_NON_STREAMING"
2501 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2502 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2503 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2507 ;; -------------------------------------------------------------------------
2508 ;; ---- Truncating scatter stores
2509 ;; -------------------------------------------------------------------------
2510 ;; Includes scatter forms of:
2514 ;; -------------------------------------------------------------------------
2516 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2517 ;; true for unsigned extension and false for signed extension.
2518 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2519 [(set (mem:BLK (scratch))
2521 [(match_operand:VNx4BI 5 "register_operand")
2522 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2523 (match_operand:VNx4SI 1 "register_operand")
2524 (match_operand:DI 2 "const_int_operand")
2525 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2526 (truncate:VNx4_NARROW
2527 (match_operand:VNx4_WIDE 4 "register_operand"))]
2528 UNSPEC_ST1_SCATTER))]
2529 "TARGET_SVE && TARGET_NON_STREAMING"
2530 {@ [ cons: 1 , 2 , 4 , 5 ]
2531 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2532 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2533 [ w , Z , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2534 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2535 [ w , Z , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2536 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2540 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2541 ;; operand 2 doesn't matter in this case.
2542 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2543 [(set (mem:BLK (scratch))
2545 [(match_operand:VNx2BI 5 "register_operand")
2546 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2547 (match_operand:VNx2DI 1 "register_operand")
2548 (match_operand:DI 2 "const_int_operand")
2549 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2550 (truncate:VNx2_NARROW
2551 (match_operand:VNx2_WIDE 4 "register_operand"))]
2552 UNSPEC_ST1_SCATTER))]
2553 "TARGET_SVE && TARGET_NON_STREAMING"
2554 {@ [ cons: 1 , 4 , 5 ]
2555 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2556 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2557 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2558 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2562 ;; Likewise, but with the offset being sign-extended from 32 bits.
2563 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2564 [(set (mem:BLK (scratch))
2566 [(match_operand:VNx2BI 5 "register_operand")
2567 (match_operand:DI 0 "register_operand")
2572 (match_operand:VNx2DI 1 "register_operand")))]
2574 (match_operand:DI 2 "const_int_operand")
2575 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2576 (truncate:VNx2_NARROW
2577 (match_operand:VNx2_WIDE 4 "register_operand"))]
2578 UNSPEC_ST1_SCATTER))]
2579 "TARGET_SVE && TARGET_NON_STREAMING"
2580 {@ [ cons: 0 , 1 , 4 , 5 ]
2581 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2582 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2584 "&& !rtx_equal_p (operands[5], operands[6])"
2586 operands[6] = copy_rtx (operands[5]);
2590 ;; Likewise, but with the offset being zero-extended from 32 bits.
2591 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2592 [(set (mem:BLK (scratch))
2594 [(match_operand:VNx2BI 5 "register_operand")
2595 (match_operand:DI 0 "aarch64_reg_or_zero")
2597 (match_operand:VNx2DI 1 "register_operand")
2598 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2599 (match_operand:DI 2 "const_int_operand")
2600 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2601 (truncate:VNx2_NARROW
2602 (match_operand:VNx2_WIDE 4 "register_operand"))]
2603 UNSPEC_ST1_SCATTER))]
2604 "TARGET_SVE && TARGET_NON_STREAMING"
2605 {@ [ cons: 0 , 1 , 4 , 5 ]
2606 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2607 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2611 ;; =========================================================================
2612 ;; == Vector creation
2613 ;; =========================================================================
2615 ;; -------------------------------------------------------------------------
2616 ;; ---- [INT,FP] Duplicate element
2617 ;; -------------------------------------------------------------------------
2633 ;; -------------------------------------------------------------------------
2635 (define_expand "vec_duplicate<mode>"
2637 [(set (match_operand:SVE_ALL 0 "register_operand")
2638 (vec_duplicate:SVE_ALL
2639 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2640 (clobber (scratch:VNx16BI))])]
2643 if (MEM_P (operands[1]))
2645 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2646 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2647 CONST0_RTX (<MODE>mode)));
2653 ;; Accept memory operands for the benefit of combine, and also in case
2654 ;; the scalar input gets spilled to memory during RA. We want to split
2655 ;; the load at the first opportunity in order to allow the PTRUE to be
2656 ;; optimized with surrounding code.
2657 (define_insn_and_split "*vec_duplicate<mode>_reg"
2658 [(set (match_operand:SVE_ALL 0 "register_operand")
2659 (vec_duplicate:SVE_ALL
2660 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2661 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2663 {@ [ cons: =0 , 1 ; attrs: length ]
2664 [ w , r ; 4 ] mov\t%0.<Vetype>, %<vwcore>1
2665 [ w , w ; 4 ] mov\t%0.<Vetype>, %<Vetype>1
2668 "&& MEM_P (operands[1])"
2671 if (GET_CODE (operands[2]) == SCRATCH)
2672 operands[2] = gen_reg_rtx (VNx16BImode);
2673 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2674 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2675 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2676 CONST0_RTX (<MODE>mode)));
2681 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2683 ;; The addressing mode range of LD1RQ does not match the addressing mode
2684 ;; range of LDR Qn. If the predicate enforced the LD1RQ range, we would
2685 ;; not be able to combine LDR Qns outside that range. The predicate
2686 ;; therefore accepts all memory operands, with only the constraints
2687 ;; enforcing the actual restrictions. If the instruction is split
2688 ;; before RA, we need to load invalid addresses into a temporary.
2690 (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
2691 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
2692 (vec_duplicate:SVE_FULL
2693 (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
2694 (clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
2695 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2697 switch (which_alternative)
2700 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2701 return "dup\t%0.q, %1.q[0]";
2708 "&& MEM_P (operands[1])"
2711 if (can_create_pseudo_p ()
2712 && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
2713 operands[1] = force_reload_address (operands[1]);
2714 if (GET_CODE (operands[2]) == SCRATCH)
2715 operands[2] = gen_reg_rtx (VNx16BImode);
2716 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2717 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2718 emit_insn (gen_aarch64_sve_ld1rq<mode> (operands[0], operands[1], gp));
2723 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2724 ;; The SVE register layout puts memory lane N into (architectural)
2725 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2726 ;; lsb into the register lsb. We therefore have to describe this in rtl
2727 ;; terms as a reverse of the V128 vector followed by a duplicate.
2728 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2729 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2730 (vec_duplicate:SVE_FULL
2732 (match_operand:<V128> 1 "register_operand" "w")
2733 (match_operand 2 "descending_int_parallel"))))]
2736 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2737 GET_MODE_NUNITS (<V128>mode) - 1)"
2739 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2740 return "dup\t%0.q, %1.q[0]";
2744 ;; This is used for vec_duplicate<mode>s from memory, but can also
2745 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2747 (define_insn "sve_ld1r<mode>"
2748 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2750 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2751 (vec_duplicate:SVE_ALL
2752 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2753 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2756 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2759 ;; Load 128 bits from memory under predicate control and duplicate to
2761 (define_insn "@aarch64_sve_ld1rq<mode>"
2762 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2764 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2765 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2769 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2770 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2774 (define_insn "@aarch64_sve_ld1ro<mode>"
2775 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2777 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2778 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2781 "TARGET_SVE_F64MM && TARGET_NON_STREAMING"
2783 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2784 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2788 ;; -------------------------------------------------------------------------
2789 ;; ---- [INT,FP] Initialize from individual elements
2790 ;; -------------------------------------------------------------------------
2793 ;; -------------------------------------------------------------------------
2795 (define_expand "vec_init<mode><Vel>"
2796 [(match_operand:SVE_FULL 0 "register_operand")
2797 (match_operand 1 "")]
2800 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2805 ;; Shift an SVE vector left and insert a scalar into element 0.
2806 (define_insn "vec_shl_insert_<mode>"
2807 [(set (match_operand:SVE_FULL 0 "register_operand")
2809 [(match_operand:SVE_FULL 1 "register_operand")
2810 (match_operand:<VEL> 2 "aarch64_reg_or_zero")]
2813 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
2814 [ ?w , 0 , rZ ; * ] insr\t%0.<Vetype>, %<vwcore>2
2815 [ w , 0 , w ; * ] insr\t%0.<Vetype>, %<Vetype>2
2816 [ ??&w , w , rZ ; yes ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2817 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2
2821 ;; -------------------------------------------------------------------------
2822 ;; ---- [INT] Linear series
2823 ;; -------------------------------------------------------------------------
2826 ;; -------------------------------------------------------------------------
2828 (define_insn "vec_series<mode>"
2829 [(set (match_operand:SVE_I 0 "register_operand")
2831 (match_operand:<VEL> 1 "aarch64_sve_index_operand")
2832 (match_operand:<VEL> 2 "aarch64_sve_index_operand")))]
2834 {@ [ cons: =0 , 1 , 2 ]
2835 [ w , Usi , r ] index\t%0.<Vctype>, #%1, %<vccore>2
2836 [ w , r , Usi ] index\t%0.<Vctype>, %<vccore>1, #%2
2837 [ w , r , r ] index\t%0.<Vctype>, %<vccore>1, %<vccore>2
2841 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2842 ;; of an INDEX instruction.
2843 (define_insn "*vec_series<mode>_plus"
2844 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2846 (vec_duplicate:SVE_I
2847 (match_operand:<VEL> 1 "register_operand" "r"))
2848 (match_operand:SVE_I 2 "immediate_operand")))]
2849 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2851 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2852 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2856 ;; -------------------------------------------------------------------------
2857 ;; ---- [PRED] Duplicate element
2858 ;; -------------------------------------------------------------------------
2859 ;; The patterns in this section are synthetic.
2860 ;; -------------------------------------------------------------------------
2862 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2863 ;; input into the top bit and using a WHILELO. An alternative would be to
2864 ;; duplicate the input and do a compare with zero.
2865 (define_expand "vec_duplicate<mode>"
2866 [(set (match_operand:PRED_ALL 0 "register_operand")
2867 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2870 rtx tmp = gen_reg_rtx (DImode);
2871 rtx op1 = gen_lowpart (DImode, operands[1]);
2872 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2873 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2878 ;; =========================================================================
2879 ;; == Vector decomposition
2880 ;; =========================================================================
2882 ;; -------------------------------------------------------------------------
2883 ;; ---- [INT,FP] Extract index
2884 ;; -------------------------------------------------------------------------
2886 ;; - DUP (Advanced SIMD)
2889 ;; - ST1 (Advanced SIMD)
2890 ;; - UMOV (Advanced SIMD)
2891 ;; -------------------------------------------------------------------------
2893 (define_expand "vec_extract<mode><Vel>"
2894 [(set (match_operand:<VEL> 0 "register_operand")
2896 (match_operand:SVE_FULL 1 "register_operand")
2897 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2901 if (poly_int_rtx_p (operands[2], &val)
2902 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2904 /* The last element can be extracted with a LASTB and a false
2906 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2907 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2910 if (!CONST_INT_P (operands[2]))
2912 /* Create an index with operand[2] as the base and -1 as the step.
2913 It will then be zero for the element we care about. */
2914 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2915 index = force_reg (<VEL_INT>mode, index);
2916 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2917 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2919 /* Get a predicate that is true for only that element. */
2920 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2921 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2922 rtx sel = gen_reg_rtx (<VPRED>mode);
2923 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2925 /* Select the element using LASTB. */
2926 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2932 ;; Extract element zero. This is a special case because we want to force
2933 ;; the registers to be the same for the second alternative, and then
2934 ;; split the instruction into nothing after RA.
2935 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2936 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2938 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2939 (parallel [(const_int 0)])))]
2942 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2943 switch (which_alternative)
2946 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2950 return "st1\\t{%1.<Vetype>}[0], %0";
2955 "&& reload_completed
2956 && REG_P (operands[0])
2957 && REGNO (operands[0]) == REGNO (operands[1])"
2960 emit_note (NOTE_INSN_DELETED);
2963 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2966 ;; Extract an element from the Advanced SIMD portion of the register.
2967 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2968 ;; want any change in lane number on big-endian targets.
2969 (define_insn "*vec_extract<mode><Vel>_v128"
2970 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2972 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2973 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2975 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2977 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2978 switch (which_alternative)
2981 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2983 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2985 return "st1\\t{%1.<Vetype>}[%2], %0";
2990 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2993 ;; Extract an element in the range of DUP. This pattern allows the
2994 ;; source and destination to be different.
2995 (define_insn "*vec_extract<mode><Vel>_dup"
2996 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2998 (match_operand:SVE_FULL 1 "register_operand" "w")
2999 (parallel [(match_operand:SI 2 "const_int_operand")])))]
3001 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
3003 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3004 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
3008 ;; Extract an element outside the range of DUP. This pattern requires the
3009 ;; source and destination to be the same.
3010 (define_insn "*vec_extract<mode><Vel>_ext"
3011 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
3013 (match_operand:SVE_FULL 1 "register_operand" "0, w")
3014 (parallel [(match_operand:SI 2 "const_int_operand")])))]
3015 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
3017 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3018 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
3019 return (which_alternative == 0
3020 ? "ext\t%0.b, %0.b, %0.b, #%2"
3021 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
3023 [(set_attr "movprfx" "*,yes")]
3026 ;; -------------------------------------------------------------------------
3027 ;; ---- [INT,FP] Extract active element
3028 ;; -------------------------------------------------------------------------
3032 ;; -------------------------------------------------------------------------
3034 ;; Extract the last active element of operand 1 into operand 0.
3035 ;; If no elements are active, extract the last inactive element instead.
3036 (define_insn "@extract_<last_op>_<mode>"
3037 [(set (match_operand:<VEL> 0 "register_operand")
3039 [(match_operand:<VPRED> 1 "register_operand")
3040 (match_operand:SVE_FULL 2 "register_operand")]
3043 {@ [ cons: =0 , 1 , 2 ]
3044 [ ?r , Upl , w ] last<ab>\t%<vwcore>0, %1, %2.<Vetype>
3045 [ w , Upl , w ] last<ab>\t%<Vetype>0, %1, %2.<Vetype>
3049 ;; -------------------------------------------------------------------------
3050 ;; ---- [PRED] Extract index
3051 ;; -------------------------------------------------------------------------
3052 ;; The patterns in this section are synthetic.
3053 ;; -------------------------------------------------------------------------
3055 ;; Handle extractions from a predicate by converting to an integer vector
3056 ;; and extracting from there.
3057 (define_expand "vec_extract<vpred><Vel>"
3058 [(match_operand:<VEL> 0 "register_operand")
3059 (match_operand:<VPRED> 1 "register_operand")
3060 (match_operand:SI 2 "nonmemory_operand")
3061 ;; Dummy operand to which we can attach the iterator.
3062 (reg:SVE_FULL_I V0_REGNUM)]
3065 rtx tmp = gen_reg_rtx (<MODE>mode);
3066 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
3067 CONST1_RTX (<MODE>mode),
3068 CONST0_RTX (<MODE>mode)));
3069 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
3074 ;; =========================================================================
3075 ;; == Unary arithmetic
3076 ;; =========================================================================
3078 ;; -------------------------------------------------------------------------
3079 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
3080 ;; -------------------------------------------------------------------------
3085 ;; - CNT (= popcount)
3086 ;; - RBIT (= bitreverse)
3089 ;; -------------------------------------------------------------------------
3091 ;; Unpredicated integer unary arithmetic.
3092 (define_expand "<optab><mode>2"
3093 [(set (match_operand:SVE_I 0 "register_operand")
3096 (SVE_INT_UNARY:SVE_I
3097 (match_operand:SVE_I 1 "register_operand"))]
3101 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3105 ;; Integer unary arithmetic predicated with a PTRUE.
3106 (define_insn "@aarch64_pred_<optab><mode>"
3107 [(set (match_operand:SVE_VDQ_I 0 "register_operand")
3109 [(match_operand:<VPRED> 1 "register_operand")
3110 (SVE_INT_UNARY:SVE_VDQ_I
3111 (match_operand:SVE_VDQ_I 2 "register_operand"))]
3114 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3115 [ w , Upl , 0 ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3116 [ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3120 ;; Predicated integer unary arithmetic with merging.
3121 (define_expand "@cond_<optab><mode>"
3122 [(set (match_operand:SVE_I 0 "register_operand")
3124 [(match_operand:<VPRED> 1 "register_operand")
3125 (SVE_INT_UNARY:SVE_I
3126 (match_operand:SVE_I 2 "register_operand"))
3127 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3132 ;; Predicated integer unary arithmetic, merging with the first input.
3133 (define_insn "*cond_<optab><mode>_2"
3134 [(set (match_operand:SVE_I 0 "register_operand")
3136 [(match_operand:<VPRED> 1 "register_operand")
3137 (SVE_INT_UNARY:SVE_I
3138 (match_operand:SVE_I 2 "register_operand"))
3142 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3143 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3144 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3148 ;; Predicated integer unary arithmetic, merging with an independent value.
3150 ;; The earlyclobber isn't needed for the first alternative, but omitting
3151 ;; it would only help the case in which operands 2 and 3 are the same,
3152 ;; which is handled above rather than here. Marking all the alternatives
3153 ;; as earlyclobber helps to make the instruction more regular to the
3154 ;; register allocator.
3155 (define_insn "*cond_<optab><mode>_any"
3156 [(set (match_operand:SVE_I 0 "register_operand")
3158 [(match_operand:<VPRED> 1 "register_operand")
3159 (SVE_INT_UNARY:SVE_I
3160 (match_operand:SVE_I 2 "register_operand"))
3161 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3163 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3164 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3165 [ &w , Upl , w , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3166 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3167 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3172 ;; -------------------------------------------------------------------------
3173 ;; ---- [INT] General unary arithmetic corresponding to unspecs
3174 ;; -------------------------------------------------------------------------
3179 ;; -------------------------------------------------------------------------
3181 ;; Predicated integer unary operations.
3182 (define_insn "@aarch64_pred_<optab><mode>"
3183 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3185 [(match_operand:<VPRED> 1 "register_operand")
3187 [(match_operand:SVE_FULL_I 2 "register_operand")]
3190 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3191 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3192 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3193 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3197 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3198 ;; form being easier for permutes. The predicate mode determines the number
3199 ;; of lanes and the data mode decides the granularity of the reversal within
3201 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3202 [(set (match_operand:SVE_ALL 0 "register_operand")
3204 [(match_operand:PRED_HSD 1 "register_operand")
3206 [(match_operand:SVE_ALL 2 "register_operand")]
3209 "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3210 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3211 [ w , Upl , 0 ; * ] rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3212 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3216 ;; Predicated integer unary operations with merging.
3217 (define_insn "@cond_<optab><mode>"
3218 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3220 [(match_operand:<VPRED> 1 "register_operand")
3222 [(match_operand:SVE_FULL_I 2 "register_operand")]
3224 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3226 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3227 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3228 [ w , Upl , w , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3229 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3230 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3234 ;; -------------------------------------------------------------------------
3235 ;; ---- [INT] Sign and zero extension
3236 ;; -------------------------------------------------------------------------
3244 ;; -------------------------------------------------------------------------
3246 ;; Unpredicated sign and zero extension from a narrower mode.
3247 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3248 [(set (match_operand:SVE_HSDI 0 "register_operand")
3251 (ANY_EXTEND:SVE_HSDI
3252 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3254 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3256 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3260 ;; Predicated sign and zero extension from a narrower mode.
3261 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3262 [(set (match_operand:SVE_HSDI 0 "register_operand")
3264 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
3265 (ANY_EXTEND:SVE_HSDI
3266 (match_operand:SVE_PARTIAL_I 2 "register_operand"))]
3268 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3269 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3270 [ w , Upl , 0 ; * ] <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3271 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3275 ;; Predicated truncate-and-sign-extend operations.
3276 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3277 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3278 (unspec:SVE_FULL_HSDI
3279 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3280 (sign_extend:SVE_FULL_HSDI
3281 (truncate:SVE_PARTIAL_I
3282 (match_operand:SVE_FULL_HSDI 2 "register_operand")))]
3285 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3286 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3287 [ w , Upl , 0 ; * ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3288 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3292 ;; Predicated truncate-and-sign-extend operations with merging.
3293 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3294 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3295 (unspec:SVE_FULL_HSDI
3296 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3297 (sign_extend:SVE_FULL_HSDI
3298 (truncate:SVE_PARTIAL_I
3299 (match_operand:SVE_FULL_HSDI 2 "register_operand")))
3300 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
3303 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3304 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3305 [ w , Upl , w , 0 ; * ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3306 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3307 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3311 ;; Predicated truncate-and-zero-extend operations, merging with the
3314 ;; The canonical form of this operation is an AND of a constant rather
3315 ;; than (zero_extend (truncate ...)).
3316 (define_insn "*cond_uxt<mode>_2"
3317 [(set (match_operand:SVE_I 0 "register_operand")
3319 [(match_operand:<VPRED> 1 "register_operand")
3321 (match_operand:SVE_I 2 "register_operand")
3322 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3326 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3327 [ w , Upl , 0 ; * ] uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3328 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3332 ;; Predicated truncate-and-zero-extend operations, merging with an
3333 ;; independent value.
3335 ;; The earlyclobber isn't needed for the first alternative, but omitting
3336 ;; it would only help the case in which operands 2 and 4 are the same,
3337 ;; which is handled above rather than here. Marking all the alternatives
3338 ;; as early-clobber helps to make the instruction more regular to the
3339 ;; register allocator.
3340 (define_insn "*cond_uxt<mode>_any"
3341 [(set (match_operand:SVE_I 0 "register_operand")
3343 [(match_operand:<VPRED> 1 "register_operand")
3345 (match_operand:SVE_I 2 "register_operand")
3346 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3347 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3349 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3350 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
3351 [ &w , Upl , w , 0 ; * ] uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3352 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3353 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3357 ;; -------------------------------------------------------------------------
3358 ;; ---- [INT] Truncation
3359 ;; -------------------------------------------------------------------------
3360 ;; The patterns in this section are synthetic.
3361 ;; -------------------------------------------------------------------------
3363 ;; Truncate to a partial SVE vector from either a full vector or a
3364 ;; wider partial vector. This is a no-op, because we can just ignore
3365 ;; the unused upper bits of the source.
3366 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3367 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3368 (truncate:SVE_PARTIAL_I
3369 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3370 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3372 "&& reload_completed"
3373 [(set (match_dup 0) (match_dup 1))]
3375 operands[1] = aarch64_replace_reg_mode (operands[1],
3376 <SVE_PARTIAL_I:MODE>mode);
3380 ;; -------------------------------------------------------------------------
3381 ;; ---- [INT] Logical inverse
3382 ;; -------------------------------------------------------------------------
3385 ;; -------------------------------------------------------------------------
3387 ;; Logical inverse, predicated with a ptrue.
3388 (define_expand "@aarch64_ptrue_cnot<mode>"
3389 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3392 [(match_operand:<VPRED> 1 "register_operand")
3393 (const_int SVE_KNOWN_PTRUE)
3395 (match_operand:SVE_FULL_I 2 "register_operand")
3403 operands[3] = CONST0_RTX (<MODE>mode);
3404 operands[4] = CONST1_RTX (<MODE>mode);
3408 (define_insn "*cnot<mode>"
3409 [(set (match_operand:SVE_I 0 "register_operand")
3412 [(match_operand:<VPRED> 1 "register_operand")
3413 (const_int SVE_KNOWN_PTRUE)
3415 (match_operand:SVE_I 2 "register_operand")
3416 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3418 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3422 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3423 [ w , Upl , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3424 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3428 ;; Predicated logical inverse with merging.
3429 (define_expand "@cond_cnot<mode>"
3430 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3432 [(match_operand:<VPRED> 1 "register_operand")
3436 (const_int SVE_KNOWN_PTRUE)
3438 (match_operand:SVE_FULL_I 2 "register_operand")
3444 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3448 operands[4] = CONSTM1_RTX (<VPRED>mode);
3449 operands[5] = CONST0_RTX (<MODE>mode);
3450 operands[6] = CONST1_RTX (<MODE>mode);
3454 ;; Predicated logical inverse, merging with the first input.
3455 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3456 [(set (match_operand:SVE_I 0 "register_operand")
3458 [(match_operand:<VPRED> 1 "register_operand")
3459 ;; Logical inverse of operand 2 (as above).
3463 (const_int SVE_KNOWN_PTRUE)
3465 (match_operand:SVE_I 2 "register_operand")
3466 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3468 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3474 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3475 [ w , Upl , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3476 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3478 "&& !CONSTANT_P (operands[5])"
3480 operands[5] = CONSTM1_RTX (<VPRED>mode);
3484 ;; Predicated logical inverse, merging with an independent value.
3486 ;; The earlyclobber isn't needed for the first alternative, but omitting
3487 ;; it would only help the case in which operands 2 and 6 are the same,
3488 ;; which is handled above rather than here. Marking all the alternatives
3489 ;; as earlyclobber helps to make the instruction more regular to the
3490 ;; register allocator.
3491 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3492 [(set (match_operand:SVE_I 0 "register_operand")
3494 [(match_operand:<VPRED> 1 "register_operand")
3495 ;; Logical inverse of operand 2 (as above).
3499 (const_int SVE_KNOWN_PTRUE)
3501 (match_operand:SVE_I 2 "register_operand")
3502 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3504 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3507 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero")]
3509 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3510 {@ [ cons: =0 , 1 , 2 , 6 ; attrs: movprfx ]
3511 [ &w , Upl , w , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3512 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3513 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3515 "&& !CONSTANT_P (operands[5])"
3517 operands[5] = CONSTM1_RTX (<VPRED>mode);
3521 ;; -------------------------------------------------------------------------
3522 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3523 ;; -------------------------------------------------------------------------
3526 ;; -------------------------------------------------------------------------
3528 ;; Unpredicated unary operations that take an integer and return a float.
3529 (define_insn "@aarch64_sve_<optab><mode>"
3530 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3532 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3535 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3538 ;; -------------------------------------------------------------------------
3539 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3540 ;; -------------------------------------------------------------------------
3555 ;; -------------------------------------------------------------------------
3557 ;; Unpredicated floating-point unary operations.
3558 (define_insn "@aarch64_sve_<optab><mode>"
3559 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3561 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3564 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3567 ;; Unpredicated floating-point unary operations.
3568 (define_expand "<optab><mode>2"
3569 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3572 (const_int SVE_RELAXED_GP)
3573 (match_operand:SVE_FULL_F 1 "register_operand")]
3574 SVE_COND_FP_UNARY_OPTAB))]
3577 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3581 ;; Predicated floating-point unary operations.
3582 (define_insn "@aarch64_pred_<optab><mode>"
3583 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3585 [(match_operand:<VPRED> 1 "register_operand")
3586 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3587 (match_operand:SVE_FULL_F 2 "register_operand")]
3588 SVE_COND_FP_UNARY))]
3590 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3591 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3592 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3596 ;; Predicated floating-point unary arithmetic with merging.
3597 (define_expand "@cond_<optab><mode>"
3598 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3600 [(match_operand:<VPRED> 1 "register_operand")
3603 (const_int SVE_STRICT_GP)
3604 (match_operand:SVE_FULL_F 2 "register_operand")]
3606 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3611 ;; Predicated floating-point unary arithmetic, merging with the first input.
3612 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3613 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3615 [(match_operand:<VPRED> 1 "register_operand")
3618 (const_int SVE_RELAXED_GP)
3619 (match_operand:SVE_FULL_F 2 "register_operand")]
3624 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3625 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3626 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3628 "&& !rtx_equal_p (operands[1], operands[3])"
3630 operands[3] = copy_rtx (operands[1]);
3634 (define_insn "*cond_<optab><mode>_2_strict"
3635 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3637 [(match_operand:<VPRED> 1 "register_operand")
3640 (const_int SVE_STRICT_GP)
3641 (match_operand:SVE_FULL_F 2 "register_operand")]
3646 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3647 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3648 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3652 ;; Predicated floating-point unary arithmetic, merging with an independent
3655 ;; The earlyclobber isn't needed for the first alternative, but omitting
3656 ;; it would only help the case in which operands 2 and 3 are the same,
3657 ;; which is handled above rather than here. Marking all the alternatives
3658 ;; as earlyclobber helps to make the instruction more regular to the
3659 ;; register allocator.
3660 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3661 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3663 [(match_operand:<VPRED> 1 "register_operand")
3666 (const_int SVE_RELAXED_GP)
3667 (match_operand:SVE_FULL_F 2 "register_operand")]
3669 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3671 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3672 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3673 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3674 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3675 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3677 "&& !rtx_equal_p (operands[1], operands[4])"
3679 operands[4] = copy_rtx (operands[1]);
3683 (define_insn "*cond_<optab><mode>_any_strict"
3684 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3686 [(match_operand:<VPRED> 1 "register_operand")
3689 (const_int SVE_STRICT_GP)
3690 (match_operand:SVE_FULL_F 2 "register_operand")]
3692 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3694 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3695 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3696 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3697 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3698 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3702 ;; -------------------------------------------------------------------------
3703 ;; ---- [FP] Square root
3704 ;; -------------------------------------------------------------------------
3706 (define_expand "sqrt<mode>2"
3707 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3710 (const_int SVE_RELAXED_GP)
3711 (match_operand:SVE_FULL_F 1 "register_operand")]
3712 UNSPEC_COND_FSQRT))]
3715 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3717 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3720 ;; -------------------------------------------------------------------------
3721 ;; ---- [FP] Reciprocal square root
3722 ;; -------------------------------------------------------------------------
3724 (define_expand "rsqrt<mode>2"
3725 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3726 (unspec:SVE_FULL_SDF
3727 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3731 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3735 (define_expand "@aarch64_rsqrte<mode>"
3736 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3737 (unspec:SVE_FULL_SDF
3738 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3743 (define_expand "@aarch64_rsqrts<mode>"
3744 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3745 (unspec:SVE_FULL_SDF
3746 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3747 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3752 ;; -------------------------------------------------------------------------
3753 ;; ---- [PRED] Inverse
3754 ;; -------------------------------------------------------------------------
3757 ;; -------------------------------------------------------------------------
3759 ;; Unpredicated predicate inverse.
3760 (define_expand "one_cmpl<mode>2"
3761 [(set (match_operand:PRED_ALL 0 "register_operand")
3763 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3767 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3771 ;; Predicated predicate inverse.
3772 (define_insn "*one_cmpl<mode>3"
3773 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3775 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3776 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3778 "not\t%0.b, %1/z, %2.b"
3781 ;; =========================================================================
3782 ;; == Binary arithmetic
3783 ;; =========================================================================
3785 ;; -------------------------------------------------------------------------
3786 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3787 ;; -------------------------------------------------------------------------
3789 ;; - ADD (merging form only)
3790 ;; - AND (merging form only)
3791 ;; - ASR (merging form only)
3792 ;; - EOR (merging form only)
3793 ;; - LSL (merging form only)
3794 ;; - LSR (merging form only)
3796 ;; - ORR (merging form only)
3799 ;; - SQADD (SVE2 merging form only)
3800 ;; - SQSUB (SVE2 merging form only)
3801 ;; - SUB (merging form only)
3804 ;; - UQADD (SVE2 merging form only)
3805 ;; - UQSUB (SVE2 merging form only)
3806 ;; -------------------------------------------------------------------------
3808 ;; Unpredicated integer binary operations that have an immediate form.
3809 (define_expand "<optab><mode>3"
3810 [(set (match_operand:SVE_I 0 "register_operand")
3813 (SVE_INT_BINARY_MULTI:SVE_I
3814 (match_operand:SVE_I 1 "register_operand")
3815 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3819 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3823 ;; Unpredicated integer binary operations that have an immediate form.
3824 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
3825 ;; Make use of the overlap between Z and V registers to implement the V2DI
3826 ;; optab for TARGET_SVE. The mulvnx2di3 expander can
3827 ;; handle the TARGET_SVE2 case transparently.
3828 (define_expand "mul<mode>3"
3829 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3830 (unspec:SVE_I_SIMD_DI
3833 (match_operand:SVE_I_SIMD_DI 1 "register_operand")
3834 (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
3838 /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
3839 pattern for it here rather than splitting off the MULT expander
3843 emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
3844 operands[1], operands[2]));
3847 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3851 ;; Integer binary operations that have an immediate form, predicated
3852 ;; with a PTRUE. We don't actually need the predicate for the first
3853 ;; and third alternatives, but using Upa or X isn't likely to gain much
3854 ;; and would make the instruction seem less uniform to the register
3856 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3857 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3858 (unspec:SVE_I_SIMD_DI
3859 [(match_operand:<VPRED> 1 "register_operand")
3860 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3861 (match_operand:SVE_I_SIMD_DI 2 "register_operand")
3862 (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
3865 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3866 [ w , Upl , %0 , <sve_imm_con> ; * ] #
3867 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3868 [ ?&w , Upl , w , <sve_imm_con> ; yes ] #
3869 [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3871 ; Split the unpredicated form after reload, so that we don't have
3872 ; the unnecessary PTRUE.
3873 "&& reload_completed
3874 && !register_operand (operands[3], <MODE>mode)"
3876 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
3880 ;; Unpredicated binary operations with a constant (post-RA only).
3881 ;; These are generated by splitting a predicated instruction whose
3882 ;; predicate is unused.
3883 (define_insn "*post_ra_<optab><mode>3"
3884 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
3885 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3886 (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
3887 (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3888 "TARGET_SVE && reload_completed"
3890 <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
3891 movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
3892 [(set_attr "movprfx" "*,yes")]
3895 ;; Predicated integer operations with merging.
3896 (define_expand "@cond_<optab><mode>"
3897 [(set (match_operand:SVE_I 0 "register_operand")
3899 [(match_operand:<VPRED> 1 "register_operand")
3900 (SVE_INT_BINARY:SVE_I
3901 (match_operand:SVE_I 2 "register_operand")
3902 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3903 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3908 ;; Predicated integer operations, merging with the first input.
3909 (define_insn "*cond_<optab><mode>_2"
3910 [(set (match_operand:SVE_I 0 "register_operand")
3912 [(match_operand:<VPRED> 1 "register_operand")
3913 (SVE_INT_BINARY:SVE_I
3914 (match_operand:SVE_I 2 "register_operand")
3915 (match_operand:SVE_I 3 "register_operand"))
3919 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3920 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3921 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3925 ;; Predicated integer operations, merging with the second input.
3926 (define_insn "*cond_<optab><mode>_3"
3927 [(set (match_operand:SVE_I 0 "register_operand")
3929 [(match_operand:<VPRED> 1 "register_operand")
3930 (SVE_INT_BINARY:SVE_I
3931 (match_operand:SVE_I 2 "register_operand")
3932 (match_operand:SVE_I 3 "register_operand"))
3936 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3937 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3938 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3942 ;; Predicated integer operations, merging with an independent value.
3943 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3944 [(set (match_operand:SVE_I 0 "register_operand")
3946 [(match_operand:<VPRED> 1 "register_operand")
3947 (SVE_INT_BINARY:SVE_I
3948 (match_operand:SVE_I 2 "register_operand")
3949 (match_operand:SVE_I 3 "register_operand"))
3950 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3953 && !rtx_equal_p (operands[2], operands[4])
3954 && !rtx_equal_p (operands[3], operands[4])"
3955 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
3956 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3957 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3958 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3959 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3960 [ ?&w , Upl , w , w , w ] #
3962 "&& reload_completed
3963 && register_operand (operands[4], <MODE>mode)
3964 && !rtx_equal_p (operands[0], operands[4])"
3966 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3967 operands[4], operands[1]));
3968 operands[4] = operands[2] = operands[0];
3970 [(set_attr "movprfx" "yes")]
3973 ;; -------------------------------------------------------------------------
3974 ;; ---- [INT] Addition
3975 ;; -------------------------------------------------------------------------
3987 ;; -------------------------------------------------------------------------
3989 (define_insn "add<mode>3"
3990 [(set (match_operand:SVE_I 0 "register_operand")
3992 (match_operand:SVE_I 1 "register_operand")
3993 (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
3995 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3996 [ w , %0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
3997 [ w , 0 , vsn ; * ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3998 [ w , 0 , vsi ; * ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
3999 [ ?w , w , vsa ; yes ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
4000 [ ?w , w , vsn ; yes ] movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4001 [ w , w , w ; * ] add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4005 ;; Merging forms are handled through SVE_INT_BINARY.
4007 ;; -------------------------------------------------------------------------
4008 ;; ---- [INT] Subtraction
4009 ;; -------------------------------------------------------------------------
4013 ;; -------------------------------------------------------------------------
4015 (define_insn "sub<mode>3"
4016 [(set (match_operand:SVE_I 0 "register_operand")
4018 (match_operand:SVE_I 1 "aarch64_sve_arith_operand")
4019 (match_operand:SVE_I 2 "register_operand")))]
4021 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4022 [ w , w , w ; * ] sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4023 [ w , vsa , 0 ; * ] subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4024 [ ?&w , vsa , w ; yes ] movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4028 ;; Merging forms are handled through SVE_INT_BINARY.
4030 ;; -------------------------------------------------------------------------
4031 ;; ---- [INT] Take address
4032 ;; -------------------------------------------------------------------------
4035 ;; -------------------------------------------------------------------------
4037 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
4038 ;; but the svadrb intrinsics should preserve the user's choice.
4039 (define_insn "@aarch64_adr<mode>"
4040 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
4041 (unspec:SVE_FULL_SDI
4042 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
4043 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
4045 "TARGET_SVE && TARGET_NON_STREAMING"
4046 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
4049 ;; Same, but with the offset being sign-extended from the low 32 bits.
4050 (define_insn_and_rewrite "*aarch64_adr_sxtw"
4051 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4053 [(match_operand:VNx2DI 1 "register_operand" "w")
4058 (match_operand:VNx2DI 2 "register_operand" "w")))]
4061 "TARGET_SVE && TARGET_NON_STREAMING"
4062 "adr\t%0.d, [%1.d, %2.d, sxtw]"
4063 "&& !CONSTANT_P (operands[3])"
4065 operands[3] = CONSTM1_RTX (VNx2BImode);
4069 ;; Same, but with the offset being zero-extended from the low 32 bits.
4070 (define_insn "*aarch64_adr_uxtw_unspec"
4071 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4073 [(match_operand:VNx2DI 1 "register_operand" "w")
4075 (match_operand:VNx2DI 2 "register_operand" "w")
4076 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
4078 "TARGET_SVE && TARGET_NON_STREAMING"
4079 "adr\t%0.d, [%1.d, %2.d, uxtw]"
4082 ;; Same, matching as a PLUS rather than unspec.
4083 (define_insn "*aarch64_adr_uxtw_and"
4084 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4087 (match_operand:VNx2DI 2 "register_operand" "w")
4088 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
4089 (match_operand:VNx2DI 1 "register_operand" "w")))]
4090 "TARGET_SVE && TARGET_NON_STREAMING"
4091 "adr\t%0.d, [%1.d, %2.d, uxtw]"
4094 ;; ADR with a nonzero shift.
4095 (define_expand "@aarch64_adr<mode>_shift"
4096 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4098 (unspec:SVE_FULL_SDI
4100 (ashift:SVE_FULL_SDI
4101 (match_operand:SVE_FULL_SDI 2 "register_operand")
4102 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
4104 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
4105 "TARGET_SVE && TARGET_NON_STREAMING"
4107 operands[4] = CONSTM1_RTX (<VPRED>mode);
4111 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
4112 [(set (match_operand:SVE_24I 0 "register_operand" "=w")
4117 (match_operand:SVE_24I 2 "register_operand" "w")
4118 (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
4120 (match_operand:SVE_24I 1 "register_operand" "w")))]
4121 "TARGET_SVE && TARGET_NON_STREAMING"
4122 "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
4123 "&& !CONSTANT_P (operands[4])"
4125 operands[4] = CONSTM1_RTX (<VPRED>mode);
4129 ;; Same, but with the index being sign-extended from the low 32 bits.
4130 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
4131 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4140 (match_operand:VNx2DI 2 "register_operand" "w")))]
4142 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4144 (match_operand:VNx2DI 1 "register_operand" "w")))]
4145 "TARGET_SVE && TARGET_NON_STREAMING"
4146 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
4147 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4149 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
4153 ;; Same, but with the index being zero-extended from the low 32 bits.
4154 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
4155 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4161 (match_operand:VNx2DI 2 "register_operand" "w")
4162 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
4163 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4165 (match_operand:VNx2DI 1 "register_operand" "w")))]
4166 "TARGET_SVE && TARGET_NON_STREAMING"
4167 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
4168 "&& !CONSTANT_P (operands[5])"
4170 operands[5] = CONSTM1_RTX (VNx2BImode);
4174 ;; -------------------------------------------------------------------------
4175 ;; ---- [INT] Absolute difference
4176 ;; -------------------------------------------------------------------------
4180 ;; -------------------------------------------------------------------------
4182 ;; Unpredicated integer absolute difference.
4183 (define_expand "<su>abd<mode>3"
4184 [(use (match_operand:SVE_I 0 "register_operand"))
4186 (match_operand:SVE_I 1 "register_operand")
4187 (match_operand:SVE_I 2 "register_operand"))]
4190 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
4191 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
4197 ;; Predicated integer absolute difference.
4198 (define_insn "@aarch64_pred_<su>abd<mode>"
4199 [(set (match_operand:SVE_I 0 "register_operand")
4202 [(match_operand:<VPRED> 1 "register_operand")
4204 (match_operand:SVE_I 2 "register_operand")
4205 (match_operand:SVE_I 3 "register_operand"))]
4214 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4215 [ w , Upl , %0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4216 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4220 (define_expand "@aarch64_cond_<su>abd<mode>"
4221 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4223 [(match_operand:<VPRED> 1 "register_operand")
4228 (match_operand:SVE_FULL_I 2 "register_operand")
4229 (match_operand:SVE_FULL_I 3 "register_operand"))]
4233 (<max_opp>:SVE_FULL_I
4237 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4241 if (rtx_equal_p (operands[3], operands[4]))
4242 std::swap (operands[2], operands[3]);
4245 ;; Predicated integer absolute difference, merging with the first input.
4246 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4247 [(set (match_operand:SVE_I 0 "register_operand")
4249 [(match_operand:<VPRED> 1 "register_operand")
4254 (match_operand:SVE_I 2 "register_operand")
4255 (match_operand:SVE_I 3 "register_operand"))]
4266 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4267 [ w , Upl , 0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4268 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4270 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4272 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4276 ;; Predicated integer absolute difference, merging with the second input.
4277 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4278 [(set (match_operand:SVE_I 0 "register_operand")
4280 [(match_operand:<VPRED> 1 "register_operand")
4285 (match_operand:SVE_I 2 "register_operand")
4286 (match_operand:SVE_I 3 "register_operand"))]
4297 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4298 [ w , Upl , w , 0 ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4299 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4301 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4303 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4307 ;; Predicated integer absolute difference, merging with an independent value.
4308 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4309 [(set (match_operand:SVE_I 0 "register_operand")
4311 [(match_operand:<VPRED> 1 "register_operand")
4316 (match_operand:SVE_I 2 "register_operand")
4317 (match_operand:SVE_I 3 "register_operand"))]
4325 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4328 && !rtx_equal_p (operands[2], operands[4])
4329 && !rtx_equal_p (operands[3], operands[4])"
4330 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4331 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4332 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4333 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4334 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4335 [ ?&w , Upl , w , w , w ] #
4339 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4340 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4341 else if (reload_completed
4342 && register_operand (operands[4], <MODE>mode)
4343 && !rtx_equal_p (operands[0], operands[4]))
4345 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4346 operands[4], operands[1]));
4347 operands[4] = operands[2] = operands[0];
4352 [(set_attr "movprfx" "yes")]
4355 ;; -------------------------------------------------------------------------
4356 ;; ---- [INT] Saturating addition and subtraction
4357 ;; -------------------------------------------------------------------------
4362 ;; -------------------------------------------------------------------------
4364 ;; Unpredicated saturating signed addition and subtraction.
4365 (define_insn "@aarch64_sve_<optab><mode>"
4366 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4367 (SBINQOPS:SVE_FULL_I
4368 (match_operand:SVE_FULL_I 1 "register_operand")
4369 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand")))]
4371 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4372 [ w , 0 , vsQ ; * ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4373 [ w , 0 , vsS ; * ] <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4374 [ ?&w , w , vsQ ; yes ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4375 [ ?&w , w , vsS ; yes ] movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4376 [ w , w , w ; * ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4380 ;; Unpredicated saturating unsigned addition and subtraction.
4381 (define_insn "@aarch64_sve_<optab><mode>"
4382 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4383 (UBINQOPS:SVE_FULL_I
4384 (match_operand:SVE_FULL_I 1 "register_operand")
4385 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand")))]
4387 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4388 [ w , 0 , vsa ; * ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4389 [ ?&w , w , vsa ; yes ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4390 [ w , w , w ; * ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4394 ;; -------------------------------------------------------------------------
4395 ;; ---- [INT] Highpart multiplication
4396 ;; -------------------------------------------------------------------------
4400 ;; -------------------------------------------------------------------------
4402 ;; Unpredicated highpart multiplication.
4403 (define_expand "<su>mul<mode>3_highpart"
4404 [(set (match_operand:SVE_I 0 "register_operand")
4408 [(match_operand:SVE_I 1 "register_operand")
4409 (match_operand:SVE_I 2 "register_operand")]
4414 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4418 ;; Predicated highpart multiplication.
4419 (define_insn "@aarch64_pred_<optab><mode>"
4420 [(set (match_operand:SVE_I 0 "register_operand")
4422 [(match_operand:<VPRED> 1 "register_operand")
4424 [(match_operand:SVE_I 2 "register_operand")
4425 (match_operand:SVE_I 3 "register_operand")]
4429 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4430 [ w , Upl , %0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4431 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4435 ;; Predicated highpart multiplications with merging.
4436 (define_expand "@cond_<optab><mode>"
4437 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4439 [(match_operand:<VPRED> 1 "register_operand")
4441 [(match_operand:SVE_FULL_I 2 "register_operand")
4442 (match_operand:SVE_FULL_I 3 "register_operand")]
4444 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4448 /* Only target code is aware of these operations, so we don't need
4449 to handle the fully-general case. */
4450 gcc_assert (rtx_equal_p (operands[2], operands[4])
4451 || CONSTANT_P (operands[4]));
4454 ;; Predicated highpart multiplications, merging with the first input.
4455 (define_insn "*cond_<optab><mode>_2"
4456 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4458 [(match_operand:<VPRED> 1 "register_operand")
4460 [(match_operand:SVE_FULL_I 2 "register_operand")
4461 (match_operand:SVE_FULL_I 3 "register_operand")]
4466 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4467 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4468 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4472 ;; Predicated highpart multiplications, merging with zero.
4473 (define_insn "*cond_<optab><mode>_z"
4474 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4476 [(match_operand:<VPRED> 1 "register_operand")
4478 [(match_operand:SVE_FULL_I 2 "register_operand")
4479 (match_operand:SVE_FULL_I 3 "register_operand")]
4481 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4484 {@ [ cons: =0 , 1 , 2 , 3 ]
4485 [ &w , Upl , %0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4486 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4488 [(set_attr "movprfx" "yes")])
4490 ;; -------------------------------------------------------------------------
4491 ;; ---- [INT] Division
4492 ;; -------------------------------------------------------------------------
4498 ;; -------------------------------------------------------------------------
4500 ;; Unpredicated integer division.
4501 ;; SVE has vector integer divisions, unlike Advanced SIMD.
4502 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
4503 ;; optabs to the midend.
4504 (define_expand "<optab><mode>3"
4505 [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4506 (unspec:SVE_FULL_SDI_SIMD
4508 (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4509 (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
4510 (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
4514 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4518 ;; Integer division predicated with a PTRUE.
4519 (define_insn "@aarch64_pred_<optab><mode>"
4520 [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4521 (unspec:SVE_FULL_SDI_SIMD
4522 [(match_operand:<VPRED> 1 "register_operand")
4523 (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4524 (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
4525 (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
4528 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4529 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4530 [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
4531 [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4535 ;; Predicated integer division with merging.
4536 (define_expand "@cond_<optab><mode>"
4537 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4538 (unspec:SVE_FULL_SDI
4539 [(match_operand:<VPRED> 1 "register_operand")
4540 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4541 (match_operand:SVE_FULL_SDI 2 "register_operand")
4542 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4543 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4548 ;; Predicated integer division, merging with the first input.
4549 (define_insn "*cond_<optab><mode>_2"
4550 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4551 (unspec:SVE_FULL_SDI
4552 [(match_operand:<VPRED> 1 "register_operand")
4553 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4554 (match_operand:SVE_FULL_SDI 2 "register_operand")
4555 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4559 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4560 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4561 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4565 ;; Predicated integer division, merging with the second input.
4566 (define_insn "*cond_<optab><mode>_3"
4567 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4568 (unspec:SVE_FULL_SDI
4569 [(match_operand:<VPRED> 1 "register_operand")
4570 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4571 (match_operand:SVE_FULL_SDI 2 "register_operand")
4572 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4576 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4577 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4578 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4582 ;; Predicated integer division, merging with an independent value.
4583 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4584 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4585 (unspec:SVE_FULL_SDI
4586 [(match_operand:<VPRED> 1 "register_operand")
4587 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4588 (match_operand:SVE_FULL_SDI 2 "register_operand")
4589 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4590 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4593 && !rtx_equal_p (operands[2], operands[4])
4594 && !rtx_equal_p (operands[3], operands[4])"
4595 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4596 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4597 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4598 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4599 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4600 [ ?&w , Upl , w , w , w ] #
4602 "&& reload_completed
4603 && register_operand (operands[4], <MODE>mode)
4604 && !rtx_equal_p (operands[0], operands[4])"
4606 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4607 operands[4], operands[1]));
4608 operands[4] = operands[2] = operands[0];
4610 [(set_attr "movprfx" "yes")]
4613 ;; -------------------------------------------------------------------------
4614 ;; ---- [INT] Binary logical operations
4615 ;; -------------------------------------------------------------------------
4620 ;; -------------------------------------------------------------------------
4622 ;; Unpredicated integer binary logical operations.
4623 (define_insn "<optab><mode>3"
4624 [(set (match_operand:SVE_I 0 "register_operand")
4626 (match_operand:SVE_I 1 "register_operand")
4627 (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
4629 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4630 [ w , %0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4631 [ ?w , w , vsl ; yes ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4632 [ w , w , w ; * ] <logical>\t%0.d, %1.d, %2.d
4636 ;; Merging forms are handled through SVE_INT_BINARY.
4638 ;; -------------------------------------------------------------------------
4639 ;; ---- [INT] Binary logical operations (inverted second input)
4640 ;; -------------------------------------------------------------------------
4643 ;; -------------------------------------------------------------------------
4645 ;; Unpredicated BIC; andn named pattern.
4646 (define_expand "andn<mode>3"
4647 [(set (match_operand:SVE_I 0 "register_operand")
4651 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4653 (match_operand:SVE_I 1 "register_operand")))]
4656 operands[3] = CONSTM1_RTX (<VPRED>mode);
4661 (define_insn_and_rewrite "*bic<mode>3"
4662 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4667 (match_operand:SVE_I 2 "register_operand" "w"))]
4669 (match_operand:SVE_I 1 "register_operand" "w")))]
4671 "bic\t%0.d, %1.d, %2.d"
4672 "&& !CONSTANT_P (operands[3])"
4674 operands[3] = CONSTM1_RTX (<VPRED>mode);
4678 ;; Predicated BIC with merging.
4679 (define_expand "@cond_bic<mode>"
4680 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4682 [(match_operand:<VPRED> 1 "register_operand")
4684 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4685 (match_operand:SVE_FULL_I 2 "register_operand"))
4686 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4691 ;; Predicated integer BIC, merging with the first input.
4692 (define_insn "*cond_bic<mode>_2"
4693 [(set (match_operand:SVE_I 0 "register_operand")
4695 [(match_operand:<VPRED> 1 "register_operand")
4698 (match_operand:SVE_I 3 "register_operand"))
4699 (match_operand:SVE_I 2 "register_operand"))
4703 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4704 [ w , Upl , 0 , w ; * ] bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4705 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4709 ;; Predicated integer BIC, merging with an independent value.
4710 (define_insn_and_rewrite "*cond_bic<mode>_any"
4711 [(set (match_operand:SVE_I 0 "register_operand")
4713 [(match_operand:<VPRED> 1 "register_operand")
4716 (match_operand:SVE_I 3 "register_operand"))
4717 (match_operand:SVE_I 2 "register_operand"))
4718 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4720 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4721 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4722 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4723 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4724 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4725 [ ?&w , Upl , w , w , w ] #
4727 "&& reload_completed
4728 && register_operand (operands[4], <MODE>mode)
4729 && !rtx_equal_p (operands[0], operands[4])"
4731 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4732 operands[4], operands[1]));
4733 operands[4] = operands[2] = operands[0];
4735 [(set_attr "movprfx" "yes")]
4738 ;; -------------------------------------------------------------------------
4739 ;; ---- [INT] Shifts (rounding towards -Inf)
4740 ;; -------------------------------------------------------------------------
4748 ;; -------------------------------------------------------------------------
4750 ;; Unpredicated shift by a scalar, which expands into one of the vector
4752 (define_expand "<ASHIFT:optab><mode>3"
4753 [(set (match_operand:SVE_I 0 "register_operand")
4755 (match_operand:SVE_I 1 "register_operand")
4756 (match_operand:<VEL> 2 "general_operand")))]
4760 if (CONST_INT_P (operands[2]))
4762 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4763 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4764 amount = force_reg (<MODE>mode, amount);
4768 amount = convert_to_mode (<VEL>mode, operands[2], 0);
4769 amount = expand_vector_broadcast (<MODE>mode, amount);
4771 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4776 ;; Unpredicated shift by a vector.
4777 (define_expand "v<optab><mode>3"
4778 [(set (match_operand:SVE_I 0 "register_operand")
4782 (match_operand:SVE_I 1 "register_operand")
4783 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4787 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4791 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4792 ;; the predicate for the first alternative, but using Upa or X isn't
4793 ;; likely to gain much and would make the instruction seem less uniform
4794 ;; to the register allocator.
4795 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4796 [(set (match_operand:SVE_I 0 "register_operand")
4798 [(match_operand:<VPRED> 1 "register_operand")
4800 (match_operand:SVE_I 2 "register_operand")
4801 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
4804 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4805 [ w , Upl , w , D<lr> ; * ] #
4806 [ w , Upl , 0 , w ; * ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4807 [ w , Upl , w , 0 ; * ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4808 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4810 "&& reload_completed
4811 && !register_operand (operands[3], <MODE>mode)"
4812 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4816 ;; Unpredicated shift operations by a constant (post-RA only).
4817 ;; These are generated by splitting a predicated instruction whose
4818 ;; predicate is unused.
4819 (define_insn "*post_ra_v_ashl<mode>3"
4820 [(set (match_operand:SVE_I 0 "register_operand")
4822 (match_operand:SVE_I 1 "register_operand")
4823 (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
4824 "TARGET_SVE && reload_completed"
4825 {@ [ cons: =0 , 1 , 2 ]
4826 [ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
4827 [ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
4831 (define_insn "*post_ra_v_<optab><mode>3"
4832 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4834 (match_operand:SVE_I 1 "register_operand" "w")
4835 (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
4836 "TARGET_SVE && reload_completed"
4837 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4840 ;; Predicated integer shift, merging with the first input.
4841 (define_insn "*cond_<optab><mode>_2_const"
4842 [(set (match_operand:SVE_I 0 "register_operand")
4844 [(match_operand:<VPRED> 1 "register_operand")
4846 (match_operand:SVE_I 2 "register_operand")
4847 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4851 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4852 [ w , Upl , 0 ; * ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4853 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4857 ;; Predicated integer shift, merging with an independent value.
4858 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4859 [(set (match_operand:SVE_I 0 "register_operand")
4861 [(match_operand:<VPRED> 1 "register_operand")
4863 (match_operand:SVE_I 2 "register_operand")
4864 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4865 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4867 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4868 {@ [ cons: =0 , 1 , 2 , 4 ]
4869 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4870 [ &w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4871 [ ?&w , Upl , w , w ] #
4873 "&& reload_completed
4874 && register_operand (operands[4], <MODE>mode)
4875 && !rtx_equal_p (operands[0], operands[4])"
4877 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4878 operands[4], operands[1]));
4879 operands[4] = operands[2] = operands[0];
4881 [(set_attr "movprfx" "yes")]
4884 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4885 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4886 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4887 (unspec:SVE_FULL_BHSI
4888 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4889 (match_operand:VNx2DI 2 "register_operand" "w")]
4892 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4895 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4896 (define_expand "@cond_<sve_int_op><mode>"
4897 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4898 (unspec:SVE_FULL_BHSI
4899 [(match_operand:<VPRED> 1 "register_operand")
4900 (unspec:SVE_FULL_BHSI
4901 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4902 (match_operand:VNx2DI 3 "register_operand")]
4904 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4909 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4911 (define_insn "*cond_<sve_int_op><mode>_m"
4912 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4913 (unspec:SVE_FULL_BHSI
4914 [(match_operand:<VPRED> 1 "register_operand")
4915 (unspec:SVE_FULL_BHSI
4916 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4917 (match_operand:VNx2DI 3 "register_operand")]
4922 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4923 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4924 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4928 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4929 (define_insn "*cond_<sve_int_op><mode>_z"
4930 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4931 (unspec:SVE_FULL_BHSI
4932 [(match_operand:<VPRED> 1 "register_operand")
4933 (unspec:SVE_FULL_BHSI
4934 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4935 (match_operand:VNx2DI 3 "register_operand")]
4937 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4940 {@ [ cons: =0 , 1 , 2 , 3 ]
4941 [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4942 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4944 [(set_attr "movprfx" "yes")])
4946 ;; -------------------------------------------------------------------------
4947 ;; ---- [INT] Shifts (rounding towards 0)
4948 ;; -------------------------------------------------------------------------
4954 ;; -------------------------------------------------------------------------
4956 ;; Unpredicated ASRD.
4957 (define_expand "sdiv_pow2<mode>3"
4958 [(set (match_operand:SVE_I 0 "register_operand")
4962 [(match_operand:SVE_I 1 "register_operand")
4963 (match_operand 2 "aarch64_simd_rshift_imm")]
4968 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4973 (define_insn "*sdiv_pow2<mode>3"
4974 [(set (match_operand:SVE_I 0 "register_operand")
4976 [(match_operand:<VPRED> 1 "register_operand")
4978 [(match_operand:SVE_I 2 "register_operand")
4979 (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
4983 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4984 [ w , Upl , 0 ; * ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4985 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4989 ;; Predicated shift with merging.
4990 (define_expand "@cond_<sve_int_op><mode>"
4991 [(set (match_operand:SVE_I 0 "register_operand")
4993 [(match_operand:<VPRED> 1 "register_operand")
4997 [(match_operand:SVE_I 2 "register_operand")
4998 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5001 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5005 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
5009 ;; Predicated shift, merging with the first input.
5010 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
5011 [(set (match_operand:SVE_I 0 "register_operand")
5013 [(match_operand:<VPRED> 1 "register_operand")
5017 [(match_operand:SVE_I 2 "register_operand")
5018 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5024 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5025 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5026 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5028 "&& !CONSTANT_P (operands[4])"
5030 operands[4] = CONSTM1_RTX (<VPRED>mode);
5034 ;; Predicated shift, merging with an independent value.
5035 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
5036 [(set (match_operand:SVE_I 0 "register_operand")
5038 [(match_operand:<VPRED> 1 "register_operand")
5042 [(match_operand:SVE_I 2 "register_operand")
5043 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5046 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5048 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5049 {@ [ cons: =0 , 1 , 2 , 4 ]
5050 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5051 [ &w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5052 [ ?&w , Upl , w , w ] #
5054 "&& reload_completed
5055 && register_operand (operands[4], <MODE>mode)
5056 && !rtx_equal_p (operands[0], operands[4])"
5058 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5059 operands[4], operands[1]));
5060 operands[4] = operands[2] = operands[0];
5062 [(set_attr "movprfx" "yes")]
5065 ;; -------------------------------------------------------------------------
5066 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
5067 ;; -------------------------------------------------------------------------
5072 ;; -------------------------------------------------------------------------
5074 ;; Unpredicated floating-point binary operations that take an integer as
5075 ;; their second operand.
5076 (define_insn "@aarch64_sve_<optab><mode>"
5077 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5079 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5080 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
5081 SVE_FP_BINARY_INT))]
5083 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5086 ;; Predicated floating-point binary operations that take an integer
5087 ;; as their second operand.
5088 (define_insn "@aarch64_pred_<optab><mode>"
5089 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5091 [(match_operand:<VPRED> 1 "register_operand")
5092 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5093 (match_operand:SVE_FULL_F 2 "register_operand")
5094 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5095 SVE_COND_FP_BINARY_INT))]
5097 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5098 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5099 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5103 ;; Predicated floating-point binary operations with merging, taking an
5104 ;; integer as their second operand.
5105 (define_expand "@cond_<optab><mode>"
5106 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5108 [(match_operand:<VPRED> 1 "register_operand")
5111 (const_int SVE_STRICT_GP)
5112 (match_operand:SVE_FULL_F 2 "register_operand")
5113 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5114 SVE_COND_FP_BINARY_INT)
5115 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5120 ;; Predicated floating-point binary operations that take an integer as their
5121 ;; second operand, with inactive lanes coming from the first operand.
5122 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5123 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5125 [(match_operand:<VPRED> 1 "register_operand")
5128 (const_int SVE_RELAXED_GP)
5129 (match_operand:SVE_FULL_F 2 "register_operand")
5130 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5131 SVE_COND_FP_BINARY_INT)
5135 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5136 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5137 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5139 "&& !rtx_equal_p (operands[1], operands[4])"
5141 operands[4] = copy_rtx (operands[1]);
5145 (define_insn "*cond_<optab><mode>_2_strict"
5146 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5148 [(match_operand:<VPRED> 1 "register_operand")
5151 (const_int SVE_STRICT_GP)
5152 (match_operand:SVE_FULL_F 2 "register_operand")
5153 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5154 SVE_COND_FP_BINARY_INT)
5158 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5159 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5160 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5164 ;; Predicated floating-point binary operations that take an integer as
5165 ;; their second operand, with the values of inactive lanes being distinct
5166 ;; from the other inputs.
5167 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5168 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5170 [(match_operand:<VPRED> 1 "register_operand")
5173 (const_int SVE_RELAXED_GP)
5174 (match_operand:SVE_FULL_F 2 "register_operand")
5175 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5176 SVE_COND_FP_BINARY_INT)
5177 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5179 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5180 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5181 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5182 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5183 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5184 [ ?&w , Upl , w , w , w ] #
5188 if (reload_completed
5189 && register_operand (operands[4], <MODE>mode)
5190 && !rtx_equal_p (operands[0], operands[4]))
5192 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5193 operands[4], operands[1]));
5194 operands[4] = operands[2] = operands[0];
5196 else if (!rtx_equal_p (operands[1], operands[5]))
5197 operands[5] = copy_rtx (operands[1]);
5201 [(set_attr "movprfx" "yes")]
5204 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5205 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5207 [(match_operand:<VPRED> 1 "register_operand")
5210 (const_int SVE_STRICT_GP)
5211 (match_operand:SVE_FULL_F 2 "register_operand")
5212 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5213 SVE_COND_FP_BINARY_INT)
5214 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5216 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5217 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5218 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5219 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5220 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5221 [ ?&w , Upl , w , w , w ] #
5223 "&& reload_completed
5224 && register_operand (operands[4], <MODE>mode)
5225 && !rtx_equal_p (operands[0], operands[4])"
5227 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5228 operands[4], operands[1]));
5229 operands[4] = operands[2] = operands[0];
5231 [(set_attr "movprfx" "yes")]
5234 ;; -------------------------------------------------------------------------
5235 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5236 ;; -------------------------------------------------------------------------
5237 ;; Includes post-RA forms of:
5241 ;; -------------------------------------------------------------------------
5243 ;; Unpredicated floating-point binary operations (post-RA only).
5244 ;; These are generated by splitting a predicated instruction whose
5245 ;; predicate is unused.
5246 (define_insn "*post_ra_<sve_fp_op><mode>3"
5247 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5248 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
5249 (match_operand:SVE_FULL_F 1 "register_operand" "w")
5250 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
5251 "TARGET_SVE && reload_completed"
5252 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5254 ;; -------------------------------------------------------------------------
5255 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5256 ;; -------------------------------------------------------------------------
5257 ;; Includes merging forms of:
5258 ;; - FADD (constant forms handled in the "Addition" section)
5262 ;; - FMAXNM (including #0.0 and #1.0)
5264 ;; - FMINNM (including #0.0 and #1.0)
5265 ;; - FMUL (including #0.5 and #2.0)
5269 ;; - FSUB (constant forms handled in the "Addition" section)
5270 ;; - FSUBR (constant forms handled in the "Subtraction" section)
5271 ;; -------------------------------------------------------------------------
5273 ;; Unpredicated floating-point binary operations.
5274 (define_insn "@aarch64_sve_<optab><mode>"
5275 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5277 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5278 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5281 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5284 ;; Unpredicated floating-point binary operations that need to be predicated
5286 (define_expand "<optab><mode>3"
5287 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5290 (const_int SVE_RELAXED_GP)
5291 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
5292 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
5293 SVE_COND_FP_BINARY_OPTAB))]
5296 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5300 ;; Predicated floating-point binary operations that have no immediate forms.
5301 (define_insn "@aarch64_pred_<optab><mode>"
5302 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5304 [(match_operand:<VPRED> 1 "register_operand")
5305 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5306 (match_operand:SVE_FULL_F 2 "register_operand")
5307 (match_operand:SVE_FULL_F 3 "register_operand")]
5308 SVE_COND_FP_BINARY_REG))]
5310 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5311 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5312 [ w , Upl , w , 0 ; * ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5313 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5317 ;; Predicated floating-point operations with merging.
5318 (define_expand "@cond_<optab><mode>"
5319 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5321 [(match_operand:<VPRED> 1 "register_operand")
5324 (const_int SVE_STRICT_GP)
5325 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
5326 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
5328 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5333 ;; Predicated floating-point operations, merging with the first input.
5334 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5335 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5337 [(match_operand:<VPRED> 1 "register_operand")
5340 (const_int SVE_RELAXED_GP)
5341 (match_operand:SVE_FULL_F 2 "register_operand")
5342 (match_operand:SVE_FULL_F 3 "register_operand")]
5347 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5348 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5349 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5351 "&& !rtx_equal_p (operands[1], operands[4])"
5353 operands[4] = copy_rtx (operands[1]);
5357 (define_insn "*cond_<optab><mode>_2_strict"
5358 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5360 [(match_operand:<VPRED> 1 "register_operand")
5363 (const_int SVE_STRICT_GP)
5364 (match_operand:SVE_FULL_F 2 "register_operand")
5365 (match_operand:SVE_FULL_F 3 "register_operand")]
5370 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5371 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5372 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5376 ;; Same for operations that take a 1-bit constant.
5377 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5378 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5380 [(match_operand:<VPRED> 1 "register_operand")
5383 (const_int SVE_RELAXED_GP)
5384 (match_operand:SVE_FULL_F 2 "register_operand")
5385 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5386 SVE_COND_FP_BINARY_I1)
5390 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5391 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5392 [ ?w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5394 "&& !rtx_equal_p (operands[1], operands[4])"
5396 operands[4] = copy_rtx (operands[1]);
5400 (define_insn "*cond_<optab><mode>_2_const_strict"
5401 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5403 [(match_operand:<VPRED> 1 "register_operand")
5406 (const_int SVE_STRICT_GP)
5407 (match_operand:SVE_FULL_F 2 "register_operand")
5408 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5409 SVE_COND_FP_BINARY_I1)
5413 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5414 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5415 [ ?w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5419 ;; Predicated floating-point operations, merging with the second input.
5420 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5421 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5423 [(match_operand:<VPRED> 1 "register_operand")
5426 (const_int SVE_RELAXED_GP)
5427 (match_operand:SVE_FULL_F 2 "register_operand")
5428 (match_operand:SVE_FULL_F 3 "register_operand")]
5433 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5434 [ w , Upl , w , 0 ; * ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5435 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5437 "&& !rtx_equal_p (operands[1], operands[4])"
5439 operands[4] = copy_rtx (operands[1]);
5443 (define_insn "*cond_<optab><mode>_3_strict"
5444 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5446 [(match_operand:<VPRED> 1 "register_operand")
5449 (const_int SVE_STRICT_GP)
5450 (match_operand:SVE_FULL_F 2 "register_operand")
5451 (match_operand:SVE_FULL_F 3 "register_operand")]
5456 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5457 [ w , Upl , w , 0 ; * ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5458 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5462 ;; Predicated floating-point operations, merging with an independent value.
5463 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5464 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5466 [(match_operand:<VPRED> 1 "register_operand")
5469 (const_int SVE_RELAXED_GP)
5470 (match_operand:SVE_FULL_F 2 "register_operand")
5471 (match_operand:SVE_FULL_F 3 "register_operand")]
5473 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5476 && !rtx_equal_p (operands[2], operands[4])
5477 && !rtx_equal_p (operands[3], operands[4])"
5478 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5479 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5480 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5481 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5482 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5483 [ ?&w , Upl , w , w , w ] #
5487 if (reload_completed
5488 && register_operand (operands[4], <MODE>mode)
5489 && !rtx_equal_p (operands[0], operands[4]))
5491 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5492 operands[4], operands[1]));
5493 operands[4] = operands[2] = operands[0];
5495 else if (!rtx_equal_p (operands[1], operands[5]))
5496 operands[5] = copy_rtx (operands[1]);
5500 [(set_attr "movprfx" "yes")]
5503 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5504 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5506 [(match_operand:<VPRED> 1 "register_operand")
5509 (const_int SVE_STRICT_GP)
5510 (match_operand:SVE_FULL_F 2 "register_operand")
5511 (match_operand:SVE_FULL_F 3 "register_operand")]
5513 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5516 && !rtx_equal_p (operands[2], operands[4])
5517 && !rtx_equal_p (operands[3], operands[4])"
5518 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5519 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5520 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5521 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5522 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5523 [ ?&w , Upl , w , w , w ] #
5525 "&& reload_completed
5526 && register_operand (operands[4], <MODE>mode)
5527 && !rtx_equal_p (operands[0], operands[4])"
5529 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5530 operands[4], operands[1]));
5531 operands[4] = operands[2] = operands[0];
5533 [(set_attr "movprfx" "yes")]
5536 ;; Same for operations that take a 1-bit constant.
5537 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5538 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5540 [(match_operand:<VPRED> 1 "register_operand")
5543 (const_int SVE_RELAXED_GP)
5544 (match_operand:SVE_FULL_F 2 "register_operand")
5545 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5546 SVE_COND_FP_BINARY_I1)
5547 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5549 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5550 {@ [ cons: =0 , 1 , 2 , 4 ]
5551 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5552 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5553 [ ?w , Upl , w , w ] #
5557 if (reload_completed
5558 && register_operand (operands[4], <MODE>mode)
5559 && !rtx_equal_p (operands[0], operands[4]))
5561 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5562 operands[4], operands[1]));
5563 operands[4] = operands[2] = operands[0];
5565 else if (!rtx_equal_p (operands[1], operands[5]))
5566 operands[5] = copy_rtx (operands[1]);
5570 [(set_attr "movprfx" "yes")]
5573 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5574 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5576 [(match_operand:<VPRED> 1 "register_operand")
5579 (const_int SVE_STRICT_GP)
5580 (match_operand:SVE_FULL_F 2 "register_operand")
5581 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5582 SVE_COND_FP_BINARY_I1)
5583 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5585 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5586 {@ [ cons: =0 , 1 , 2 , 4 ]
5587 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5588 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5589 [ ?w , Upl , w , w ] #
5591 "&& reload_completed
5592 && register_operand (operands[4], <MODE>mode)
5593 && !rtx_equal_p (operands[0], operands[4])"
5595 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5596 operands[4], operands[1]));
5597 operands[4] = operands[2] = operands[0];
5599 [(set_attr "movprfx" "yes")]
5602 ;; -------------------------------------------------------------------------
5603 ;; ---- [FP] Addition
5604 ;; -------------------------------------------------------------------------
5608 ;; -------------------------------------------------------------------------
5610 ;; Predicated floating-point addition.
5611 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5612 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5614 [(match_operand:<VPRED> 1 "register_operand")
5615 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5616 (match_operand:SVE_FULL_F 2 "register_operand")
5617 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
5620 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
5621 [ w , Upl , %0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5622 [ w , Upl , 0 , vsN , i ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5623 [ w , Upl , w , w , Z ; * ] #
5624 [ w , Upl , 0 , w , Ui1 ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5625 [ ?&w , Upl , w , vsA , i ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5626 [ ?&w , Upl , w , vsN , i ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5627 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5629 ; Split the unpredicated form after reload, so that we don't have
5630 ; the unnecessary PTRUE.
5631 "&& reload_completed
5632 && register_operand (operands[3], <MODE>mode)
5633 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5634 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5638 ;; Predicated floating-point addition of a constant, merging with the
5640 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5641 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5643 [(match_operand:<VPRED> 1 "register_operand")
5646 (const_int SVE_RELAXED_GP)
5647 (match_operand:SVE_FULL_F 2 "register_operand")
5648 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5653 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5654 [ w , Upl , 0 , vsA ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5655 [ w , Upl , 0 , vsN ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5656 [ ?w , Upl , w , vsA ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5657 [ ?w , Upl , w , vsN ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5659 "&& !rtx_equal_p (operands[1], operands[4])"
5661 operands[4] = copy_rtx (operands[1]);
5665 (define_insn "*cond_add<mode>_2_const_strict"
5666 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5668 [(match_operand:<VPRED> 1 "register_operand")
5671 (const_int SVE_STRICT_GP)
5672 (match_operand:SVE_FULL_F 2 "register_operand")
5673 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5678 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5679 [ w , Upl , 0 , vsA ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5680 [ w , Upl , 0 , vsN ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5681 [ ?w , Upl , w , vsA ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5682 [ ?w , Upl , w , vsN ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5686 ;; Predicated floating-point addition of a constant, merging with an
5687 ;; independent value.
5688 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5689 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5691 [(match_operand:<VPRED> 1 "register_operand")
5694 (const_int SVE_RELAXED_GP)
5695 (match_operand:SVE_FULL_F 2 "register_operand")
5696 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5698 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5700 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5701 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5702 [ w , Upl , w , vsA , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5703 [ w , Upl , w , vsN , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5704 [ w , Upl , w , vsA , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5705 [ w , Upl , w , vsN , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5706 [ ?w , Upl , w , vsA , w ] #
5707 [ ?w , Upl , w , vsN , w ] #
5711 if (reload_completed
5712 && register_operand (operands[4], <MODE>mode)
5713 && !rtx_equal_p (operands[0], operands[4]))
5715 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5716 operands[4], operands[1]));
5717 operands[4] = operands[2] = operands[0];
5719 else if (!rtx_equal_p (operands[1], operands[5]))
5720 operands[5] = copy_rtx (operands[1]);
5724 [(set_attr "movprfx" "yes")]
5727 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5728 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5730 [(match_operand:<VPRED> 1 "register_operand")
5733 (const_int SVE_STRICT_GP)
5734 (match_operand:SVE_FULL_F 2 "register_operand")
5735 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5737 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5739 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5740 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5741 [ w , Upl , w , vsA , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5742 [ w , Upl , w , vsN , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5743 [ w , Upl , w , vsA , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5744 [ w , Upl , w , vsN , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5745 [ ?w , Upl , w , vsA , w ] #
5746 [ ?w , Upl , w , vsN , w ] #
5748 "&& reload_completed
5749 && register_operand (operands[4], <MODE>mode)
5750 && !rtx_equal_p (operands[0], operands[4])"
5752 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5753 operands[4], operands[1]));
5754 operands[4] = operands[2] = operands[0];
5756 [(set_attr "movprfx" "yes")]
5759 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5761 ;; -------------------------------------------------------------------------
5762 ;; ---- [FP] Complex addition
5763 ;; -------------------------------------------------------------------------
5766 ;; -------------------------------------------------------------------------
5768 ;; Predicated FCADD.
5769 (define_insn "@aarch64_pred_<optab><mode>"
5770 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5772 [(match_operand:<VPRED> 1 "register_operand")
5773 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5774 (match_operand:SVE_FULL_F 2 "register_operand")
5775 (match_operand:SVE_FULL_F 3 "register_operand")]
5778 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5779 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5780 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5784 ;; Predicated FCADD with merging.
5785 (define_expand "@cond_<optab><mode>"
5786 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5788 [(match_operand:<VPRED> 1 "register_operand")
5791 (const_int SVE_STRICT_GP)
5792 (match_operand:SVE_FULL_F 2 "register_operand")
5793 (match_operand:SVE_FULL_F 3 "register_operand")]
5795 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5800 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5801 (define_expand "@cadd<rot><mode>3"
5802 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5805 (const_int SVE_RELAXED_GP)
5806 (match_operand:SVE_FULL_F 1 "register_operand")
5807 (match_operand:SVE_FULL_F 2 "register_operand")]
5811 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5814 ;; Predicated FCADD, merging with the first input.
5815 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5816 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5818 [(match_operand:<VPRED> 1 "register_operand")
5821 (const_int SVE_RELAXED_GP)
5822 (match_operand:SVE_FULL_F 2 "register_operand")
5823 (match_operand:SVE_FULL_F 3 "register_operand")]
5828 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5829 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5830 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5832 "&& !rtx_equal_p (operands[1], operands[4])"
5834 operands[4] = copy_rtx (operands[1]);
5838 (define_insn "*cond_<optab><mode>_2_strict"
5839 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5841 [(match_operand:<VPRED> 1 "register_operand")
5844 (const_int SVE_STRICT_GP)
5845 (match_operand:SVE_FULL_F 2 "register_operand")
5846 (match_operand:SVE_FULL_F 3 "register_operand")]
5851 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5852 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5853 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5857 ;; Predicated FCADD, merging with an independent value.
5858 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5859 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5861 [(match_operand:<VPRED> 1 "register_operand")
5864 (const_int SVE_RELAXED_GP)
5865 (match_operand:SVE_FULL_F 2 "register_operand")
5866 (match_operand:SVE_FULL_F 3 "register_operand")]
5868 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5870 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5871 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5872 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5873 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5874 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5875 [ ?&w , Upl , w , w , w ] #
5879 if (reload_completed
5880 && register_operand (operands[4], <MODE>mode)
5881 && !rtx_equal_p (operands[0], operands[4]))
5883 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5884 operands[4], operands[1]));
5885 operands[4] = operands[2] = operands[0];
5887 else if (!rtx_equal_p (operands[1], operands[5]))
5888 operands[5] = copy_rtx (operands[1]);
5892 [(set_attr "movprfx" "yes")]
5895 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5896 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5898 [(match_operand:<VPRED> 1 "register_operand")
5901 (const_int SVE_STRICT_GP)
5902 (match_operand:SVE_FULL_F 2 "register_operand")
5903 (match_operand:SVE_FULL_F 3 "register_operand")]
5905 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5907 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5908 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5909 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5910 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5911 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5912 [ ?&w , Upl , w , w , w ] #
5914 "&& reload_completed
5915 && register_operand (operands[4], <MODE>mode)
5916 && !rtx_equal_p (operands[0], operands[4])"
5918 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5919 operands[4], operands[1]));
5920 operands[4] = operands[2] = operands[0];
5922 [(set_attr "movprfx" "yes")]
5925 ;; -------------------------------------------------------------------------
5926 ;; ---- [FP] Subtraction
5927 ;; -------------------------------------------------------------------------
5931 ;; -------------------------------------------------------------------------
5933 ;; Predicated floating-point subtraction.
5934 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5935 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5937 [(match_operand:<VPRED> 1 "register_operand")
5938 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5939 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
5940 (match_operand:SVE_FULL_F 3 "register_operand")]
5943 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
5944 [ w , Upl , vsA , 0 , i ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5945 [ w , Upl , w , w , Z ; * ] #
5946 [ w , Upl , 0 , w , Ui1 ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5947 [ w , Upl , w , 0 , Ui1 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5948 [ ?&w , Upl , vsA , w , i ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5949 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5951 ; Split the unpredicated form after reload, so that we don't have
5952 ; the unnecessary PTRUE.
5953 "&& reload_completed
5954 && register_operand (operands[2], <MODE>mode)
5955 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5956 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5960 ;; Predicated floating-point subtraction from a constant, merging with the
5962 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
5963 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5965 [(match_operand:<VPRED> 1 "register_operand")
5968 (const_int SVE_RELAXED_GP)
5969 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5970 (match_operand:SVE_FULL_F 3 "register_operand")]
5975 {@ [ cons: =0 , 1 , 3 ; attrs: movprfx ]
5976 [ w , Upl , 0 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5977 [ ?w , Upl , w ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5979 "&& !rtx_equal_p (operands[1], operands[4])"
5981 operands[4] = copy_rtx (operands[1]);
5985 (define_insn "*cond_sub<mode>_3_const_strict"
5986 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5988 [(match_operand:<VPRED> 1 "register_operand")
5991 (const_int SVE_STRICT_GP)
5992 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5993 (match_operand:SVE_FULL_F 3 "register_operand")]
5998 {@ [ cons: =0 , 1 , 3 ; attrs: movprfx ]
5999 [ w , Upl , 0 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6000 [ ?w , Upl , w ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6004 ;; Predicated floating-point subtraction from a constant, merging with an
6005 ;; independent value.
6006 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
6007 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6009 [(match_operand:<VPRED> 1 "register_operand")
6012 (const_int SVE_RELAXED_GP)
6013 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6014 (match_operand:SVE_FULL_F 3 "register_operand")]
6016 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6018 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6019 {@ [ cons: =0 , 1 , 3 , 4 ]
6020 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6021 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6022 [ ?w , Upl , w , w ] #
6026 if (reload_completed
6027 && register_operand (operands[4], <MODE>mode)
6028 && !rtx_equal_p (operands[0], operands[4]))
6030 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6031 operands[4], operands[1]));
6032 operands[4] = operands[3] = operands[0];
6034 else if (!rtx_equal_p (operands[1], operands[5]))
6035 operands[5] = copy_rtx (operands[1]);
6039 [(set_attr "movprfx" "yes")]
6042 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
6043 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6045 [(match_operand:<VPRED> 1 "register_operand")
6048 (const_int SVE_STRICT_GP)
6049 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6050 (match_operand:SVE_FULL_F 3 "register_operand")]
6052 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6054 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6055 {@ [ cons: =0 , 1 , 3 , 4 ]
6056 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6057 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6058 [ ?w , Upl , w , w ] #
6060 "&& reload_completed
6061 && register_operand (operands[4], <MODE>mode)
6062 && !rtx_equal_p (operands[0], operands[4])"
6064 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6065 operands[4], operands[1]));
6066 operands[4] = operands[3] = operands[0];
6068 [(set_attr "movprfx" "yes")]
6070 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
6072 ;; -------------------------------------------------------------------------
6073 ;; ---- [FP] Absolute difference
6074 ;; -------------------------------------------------------------------------
6077 ;; -------------------------------------------------------------------------
6079 ;; Predicated floating-point absolute difference.
6080 (define_expand "@aarch64_pred_abd<mode>"
6081 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6083 [(match_operand:<VPRED> 1 "register_operand")
6084 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6088 (match_operand:SVE_FULL_F 2 "register_operand")
6089 (match_operand:SVE_FULL_F 3 "register_operand")]
6095 ;; Predicated floating-point absolute difference.
6096 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
6097 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6099 [(match_operand:<VPRED> 1 "register_operand")
6100 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6103 (const_int SVE_RELAXED_GP)
6104 (match_operand:SVE_FULL_F 2 "register_operand")
6105 (match_operand:SVE_FULL_F 3 "register_operand")]
6109 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6110 [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6111 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6113 "&& !rtx_equal_p (operands[1], operands[5])"
6115 operands[5] = copy_rtx (operands[1]);
6119 (define_insn "*aarch64_pred_abd<mode>_strict"
6120 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6122 [(match_operand:<VPRED> 1 "register_operand")
6123 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6126 (const_int SVE_STRICT_GP)
6127 (match_operand:SVE_FULL_F 2 "register_operand")
6128 (match_operand:SVE_FULL_F 3 "register_operand")]
6132 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6133 [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6134 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6138 (define_expand "@aarch64_cond_abd<mode>"
6139 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6141 [(match_operand:<VPRED> 1 "register_operand")
6144 (const_int SVE_STRICT_GP)
6147 (const_int SVE_STRICT_GP)
6148 (match_operand:SVE_FULL_F 2 "register_operand")
6149 (match_operand:SVE_FULL_F 3 "register_operand")]
6152 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6156 if (rtx_equal_p (operands[3], operands[4]))
6157 std::swap (operands[2], operands[3]);
6160 ;; Predicated floating-point absolute difference, merging with the first
6162 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
6163 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6165 [(match_operand:<VPRED> 1 "register_operand")
6168 (const_int SVE_RELAXED_GP)
6171 (const_int SVE_RELAXED_GP)
6172 (match_operand:SVE_FULL_F 2 "register_operand")
6173 (match_operand:SVE_FULL_F 3 "register_operand")]
6179 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6180 [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6181 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6183 "&& (!rtx_equal_p (operands[1], operands[4])
6184 || !rtx_equal_p (operands[1], operands[5]))"
6186 operands[4] = copy_rtx (operands[1]);
6187 operands[5] = copy_rtx (operands[1]);
6191 (define_insn "*aarch64_cond_abd<mode>_2_strict"
6192 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6194 [(match_operand:<VPRED> 1 "register_operand")
6197 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6200 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6201 (match_operand:SVE_FULL_F 2 "register_operand")
6202 (match_operand:SVE_FULL_F 3 "register_operand")]
6208 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6209 [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6210 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6214 ;; Predicated floating-point absolute difference, merging with the second
6216 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
6217 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6219 [(match_operand:<VPRED> 1 "register_operand")
6222 (const_int SVE_RELAXED_GP)
6225 (const_int SVE_RELAXED_GP)
6226 (match_operand:SVE_FULL_F 2 "register_operand")
6227 (match_operand:SVE_FULL_F 3 "register_operand")]
6233 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6234 [ w , Upl , w , 0 ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6235 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6237 "&& (!rtx_equal_p (operands[1], operands[4])
6238 || !rtx_equal_p (operands[1], operands[5]))"
6240 operands[4] = copy_rtx (operands[1]);
6241 operands[5] = copy_rtx (operands[1]);
6245 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6246 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6248 [(match_operand:<VPRED> 1 "register_operand")
6251 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6254 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6255 (match_operand:SVE_FULL_F 2 "register_operand")
6256 (match_operand:SVE_FULL_F 3 "register_operand")]
6262 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6263 [ w , Upl , w , 0 ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6264 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6268 ;; Predicated floating-point absolute difference, merging with an
6269 ;; independent value.
6270 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6271 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6273 [(match_operand:<VPRED> 1 "register_operand")
6276 (const_int SVE_RELAXED_GP)
6279 (const_int SVE_RELAXED_GP)
6280 (match_operand:SVE_FULL_F 2 "register_operand")
6281 (match_operand:SVE_FULL_F 3 "register_operand")]
6284 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6287 && !rtx_equal_p (operands[2], operands[4])
6288 && !rtx_equal_p (operands[3], operands[4])"
6289 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6290 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6291 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6292 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6293 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6294 [ ?&w , Upl , w , w , w ] #
6298 if (reload_completed
6299 && register_operand (operands[4], <MODE>mode)
6300 && !rtx_equal_p (operands[0], operands[4]))
6302 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6303 operands[4], operands[1]));
6304 operands[4] = operands[3] = operands[0];
6306 else if (!rtx_equal_p (operands[1], operands[5])
6307 || !rtx_equal_p (operands[1], operands[6]))
6309 operands[5] = copy_rtx (operands[1]);
6310 operands[6] = copy_rtx (operands[1]);
6315 [(set_attr "movprfx" "yes")]
6318 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6319 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6321 [(match_operand:<VPRED> 1 "register_operand")
6324 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6327 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6328 (match_operand:SVE_FULL_F 2 "register_operand")
6329 (match_operand:SVE_FULL_F 3 "register_operand")]
6332 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6335 && !rtx_equal_p (operands[2], operands[4])
6336 && !rtx_equal_p (operands[3], operands[4])"
6337 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6338 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6339 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6340 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6341 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6342 [ ?&w , Upl , w , w , w ] #
6344 "&& reload_completed
6345 && register_operand (operands[4], <MODE>mode)
6346 && !rtx_equal_p (operands[0], operands[4])"
6348 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6349 operands[4], operands[1]));
6350 operands[4] = operands[3] = operands[0];
6352 [(set_attr "movprfx" "yes")]
6355 ;; -------------------------------------------------------------------------
6356 ;; ---- [FP] Multiplication
6357 ;; -------------------------------------------------------------------------
6360 ;; -------------------------------------------------------------------------
6362 ;; Predicated floating-point multiplication.
6363 (define_insn_and_split "@aarch64_pred_<optab><mode>"
6364 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6366 [(match_operand:<VPRED> 1 "register_operand")
6367 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6368 (match_operand:SVE_FULL_F 2 "register_operand")
6369 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
6372 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6373 [ w , Upl , %0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6374 [ w , Upl , w , w , Z ; * ] #
6375 [ w , Upl , 0 , w , Ui1 ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6376 [ ?&w , Upl , w , vsM , i ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6377 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6379 ; Split the unpredicated form after reload, so that we don't have
6380 ; the unnecessary PTRUE.
6381 "&& reload_completed
6382 && register_operand (operands[3], <MODE>mode)
6383 && INTVAL (operands[4]) == SVE_RELAXED_GP"
6384 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
6388 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6389 ;; SVE_COND_FP_BINARY_I1.
6391 ;; Unpredicated multiplication by selected lanes.
6392 (define_insn "@aarch64_mul_lane_<mode>"
6393 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6396 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
6397 (match_operand:SI 3 "const_int_operand")]
6398 UNSPEC_SVE_LANE_SELECT)
6399 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
6401 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6404 ;; -------------------------------------------------------------------------
6405 ;; ---- [FP] Division
6406 ;; -------------------------------------------------------------------------
6407 ;; The patterns in this section are synthetic.
6408 ;; -------------------------------------------------------------------------
6410 (define_expand "div<mode>3"
6411 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6414 (const_int SVE_RELAXED_GP)
6415 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6416 (match_operand:SVE_FULL_F 2 "register_operand")]
6420 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6423 operands[1] = force_reg (<MODE>mode, operands[1]);
6424 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6428 (define_expand "@aarch64_frecpe<mode>"
6429 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6431 [(match_operand:SVE_FULL_F 1 "register_operand")]
6436 (define_expand "@aarch64_frecps<mode>"
6437 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6439 [(match_operand:SVE_FULL_F 1 "register_operand")
6440 (match_operand:SVE_FULL_F 2 "register_operand")]
6445 ;; -------------------------------------------------------------------------
6446 ;; ---- [FP] Binary logical operations
6447 ;; -------------------------------------------------------------------------
6452 ;; -------------------------------------------------------------------------
6454 ;; Binary logical operations on floating-point modes. We avoid subregs
6455 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6456 ;; aren't defined for floating-point modes.
6457 (define_insn "*<optab><mode>3"
6458 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6460 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6461 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6464 "<logicalf_op>\t%0.d, %1.d, %2.d"
6467 ;; -------------------------------------------------------------------------
6468 ;; ---- [FP] Sign copying
6469 ;; -------------------------------------------------------------------------
6470 ;; The patterns in this section are synthetic.
6471 ;; -------------------------------------------------------------------------
6473 (define_expand "copysign<mode>3"
6474 [(match_operand:SVE_FULL_F 0 "register_operand")
6475 (match_operand:SVE_FULL_F 1 "register_operand")
6476 (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
6479 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6480 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6481 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6482 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6484 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6485 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6488 = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6489 HOST_WIDE_INT_M1U << bits);
6491 /* copysign (x, -1) should instead be expanded as orr with the sign
6493 if (!REG_P (operands[2]))
6495 rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
6496 if (GET_CODE (op2_elt) == CONST_DOUBLE
6497 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6499 emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
6500 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6505 operands[2] = force_reg (<MODE>mode, operands[2]);
6506 emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
6507 emit_insn (gen_and<v_int_equiv>3
6509 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6512 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6513 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6518 (define_expand "cond_copysign<mode>"
6519 [(match_operand:SVE_FULL_F 0 "register_operand")
6520 (match_operand:<VPRED> 1 "register_operand")
6521 (match_operand:SVE_FULL_F 2 "register_operand")
6522 (match_operand:SVE_FULL_F 3 "nonmemory_operand")
6523 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6526 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6527 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6528 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6529 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6531 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6532 rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
6533 rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
6536 = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6537 HOST_WIDE_INT_M1U << bits);
6539 /* copysign (x, -1) should instead be expanded as orr with the sign
6541 if (!REG_P (operands[3]))
6543 rtx op2_elt = unwrap_const_vec_duplicate (operands[3]);
6544 if (GET_CODE (op2_elt) == CONST_DOUBLE
6545 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6547 arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
6548 emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
6550 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6555 operands[2] = force_reg (<MODE>mode, operands[3]);
6556 emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
6557 emit_insn (gen_and<v_int_equiv>3
6559 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6562 emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
6564 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6569 (define_expand "xorsign<mode>3"
6570 [(match_operand:SVE_FULL_F 0 "register_operand")
6571 (match_operand:SVE_FULL_F 1 "register_operand")
6572 (match_operand:SVE_FULL_F 2 "register_operand")]
6575 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6576 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6577 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6579 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6580 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6582 emit_insn (gen_and<v_int_equiv>3
6584 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6587 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6588 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6593 ;; -------------------------------------------------------------------------
6594 ;; ---- [FP] Maximum and minimum
6595 ;; -------------------------------------------------------------------------
6601 ;; -------------------------------------------------------------------------
6603 ;; Predicated floating-point maximum/minimum.
6604 (define_insn "@aarch64_pred_<optab><mode>"
6605 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6607 [(match_operand:<VPRED> 1 "register_operand")
6608 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6609 (match_operand:SVE_FULL_F 2 "register_operand")
6610 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6611 SVE_COND_FP_MAXMIN))]
6613 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6614 [ w , Upl , %0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6615 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6616 [ ?&w , Upl , w , vsB ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6617 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6621 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6622 ;; SVE_COND_FP_BINARY_I1.
6624 ;; -------------------------------------------------------------------------
6625 ;; ---- [PRED] Binary logical operations
6626 ;; -------------------------------------------------------------------------
6634 ;; -------------------------------------------------------------------------
6636 ;; Predicate AND. We can reuse one of the inputs as the GP.
6637 ;; Doubling the second operand is the preferred implementation
6638 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6639 (define_insn "and<mode>3"
6640 [(set (match_operand:PRED_ALL 0 "register_operand")
6641 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
6642 (match_operand:PRED_ALL 2 "register_operand")))]
6644 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
6645 [ &Upa , Upa , Upa ; yes ] and\t%0.b, %1/z, %2.b, %2.b
6646 [ ?Upa , 0Upa, 0Upa; yes ] ^
6647 [ Upa , Upa , Upa ; no ] ^
6651 ;; Unpredicated predicate EOR and ORR.
6652 (define_expand "<optab><mode>3"
6653 [(set (match_operand:PRED_ALL 0 "register_operand")
6655 (LOGICAL_OR:PRED_ALL
6656 (match_operand:PRED_ALL 1 "register_operand")
6657 (match_operand:PRED_ALL 2 "register_operand"))
6661 operands[3] = aarch64_ptrue_reg (<MODE>mode);
6665 ;; Predicated predicate AND, EOR and ORR.
6666 (define_insn "@aarch64_pred_<optab><mode>_z"
6667 [(set (match_operand:PRED_ALL 0 "register_operand")
6670 (match_operand:PRED_ALL 2 "register_operand")
6671 (match_operand:PRED_ALL 3 "register_operand"))
6672 (match_operand:PRED_ALL 1 "register_operand")))]
6674 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6675 [ &Upa , Upa , Upa , Upa ; yes ] <logical>\t%0.b, %1/z, %2.b, %3.b
6676 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6677 [ Upa , Upa , Upa , Upa ; no ] ^
6681 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6682 ;; the GP. Store the result in operand 0 and set the flags in the same
6683 ;; way as for PTEST.
6684 (define_insn "*<optab><mode>3_cc"
6685 [(set (reg:CC_NZC CC_REGNUM)
6687 [(match_operand:VNx16BI 1 "register_operand")
6689 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6692 (match_operand:PRED_ALL 2 "register_operand")
6693 (match_operand:PRED_ALL 3 "register_operand"))
6696 (set (match_operand:PRED_ALL 0 "register_operand")
6697 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6700 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6701 [ &Upa , Upa , Upa , Upa ; yes ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6702 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6703 [ Upa , Upa , Upa , Upa ; no ] ^
6707 ;; Same with just the flags result.
6708 (define_insn "*<optab><mode>3_ptest"
6709 [(set (reg:CC_NZC CC_REGNUM)
6711 [(match_operand:VNx16BI 1 "register_operand")
6713 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6716 (match_operand:PRED_ALL 2 "register_operand")
6717 (match_operand:PRED_ALL 3 "register_operand"))
6720 (clobber (match_scratch:VNx16BI 0))]
6722 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6723 [ &Upa , Upa , Upa , Upa ; yes ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6724 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6725 [ Upa , Upa , Upa , Upa ; no ] ^
6729 ;; -------------------------------------------------------------------------
6730 ;; ---- [PRED] Binary logical operations (inverted second input)
6731 ;; -------------------------------------------------------------------------
6735 ;; -------------------------------------------------------------------------
6737 ;; Predicated predicate BIC and ORN.
6738 (define_insn "aarch64_pred_<nlogical><mode>_z"
6739 [(set (match_operand:PRED_ALL 0 "register_operand")
6742 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand"))
6743 (match_operand:PRED_ALL 2 "register_operand"))
6744 (match_operand:PRED_ALL 1 "register_operand")))]
6746 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6747 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>\t%0.b, %1/z, %2.b, %3.b
6748 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6749 [ Upa , Upa , Upa , Upa ; no ] ^
6753 ;; Same, but set the flags as a side-effect.
6754 (define_insn "*<nlogical><mode>3_cc"
6755 [(set (reg:CC_NZC CC_REGNUM)
6757 [(match_operand:VNx16BI 1 "register_operand")
6759 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6763 (match_operand:PRED_ALL 3 "register_operand"))
6764 (match_operand:PRED_ALL 2 "register_operand"))
6767 (set (match_operand:PRED_ALL 0 "register_operand")
6768 (and:PRED_ALL (NLOGICAL:PRED_ALL
6769 (not:PRED_ALL (match_dup 3))
6773 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6774 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6775 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6776 [ Upa , Upa , Upa , Upa ; no ] ^
6780 ;; Same with just the flags result.
6781 (define_insn "*<nlogical><mode>3_ptest"
6782 [(set (reg:CC_NZC CC_REGNUM)
6784 [(match_operand:VNx16BI 1 "register_operand")
6786 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6790 (match_operand:PRED_ALL 3 "register_operand"))
6791 (match_operand:PRED_ALL 2 "register_operand"))
6794 (clobber (match_scratch:VNx16BI 0))]
6796 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6797 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6798 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6799 [ Upa , Upa , Upa , Upa ; no ] ^
6803 ;; -------------------------------------------------------------------------
6804 ;; ---- [PRED] Binary logical operations (inverted result)
6805 ;; -------------------------------------------------------------------------
6809 ;; -------------------------------------------------------------------------
6811 ;; Predicated predicate NAND and NOR.
6812 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6813 [(set (match_operand:PRED_ALL 0 "register_operand")
6816 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand"))
6817 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand")))
6818 (match_operand:PRED_ALL 1 "register_operand")))]
6820 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6821 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>\t%0.b, %1/z, %2.b, %3.b
6822 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6823 [ Upa , Upa , Upa , Upa ; no ] ^
6827 ;; Same, but set the flags as a side-effect.
6828 (define_insn "*<logical_nn><mode>3_cc"
6829 [(set (reg:CC_NZC CC_REGNUM)
6831 [(match_operand:VNx16BI 1 "register_operand")
6833 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6837 (match_operand:PRED_ALL 2 "register_operand"))
6839 (match_operand:PRED_ALL 3 "register_operand")))
6842 (set (match_operand:PRED_ALL 0 "register_operand")
6843 (and:PRED_ALL (NLOGICAL:PRED_ALL
6844 (not:PRED_ALL (match_dup 2))
6845 (not:PRED_ALL (match_dup 3)))
6848 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6849 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6850 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6851 [ Upa , Upa , Upa , Upa ; no ] ^
6855 ;; Same with just the flags result.
6856 (define_insn "*<logical_nn><mode>3_ptest"
6857 [(set (reg:CC_NZC CC_REGNUM)
6859 [(match_operand:VNx16BI 1 "register_operand")
6861 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6865 (match_operand:PRED_ALL 2 "register_operand"))
6867 (match_operand:PRED_ALL 3 "register_operand")))
6870 (clobber (match_scratch:VNx16BI 0))]
6872 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6873 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6874 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6875 [ Upa , Upa , Upa , Upa ; no ] ^
6879 ;; =========================================================================
6880 ;; == Ternary arithmetic
6881 ;; =========================================================================
6883 ;; -------------------------------------------------------------------------
6884 ;; ---- [INT] MLA and MAD
6885 ;; -------------------------------------------------------------------------
6889 ;; -------------------------------------------------------------------------
6891 ;; Unpredicated integer addition of product.
6892 (define_expand "fma<mode>4"
6893 [(set (match_operand:SVE_I 0 "register_operand")
6898 (match_operand:SVE_I 1 "register_operand")
6899 (match_operand:SVE_I 2 "nonmemory_operand"))]
6901 (match_operand:SVE_I 3 "register_operand")))]
6904 if (aarch64_prepare_sve_int_fma (operands, PLUS))
6906 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6910 ;; Predicated integer addition of product.
6911 (define_insn "@aarch64_pred_fma<mode>"
6912 [(set (match_operand:SVE_I 0 "register_operand")
6915 [(match_operand:<VPRED> 1 "register_operand")
6917 (match_operand:SVE_I 2 "register_operand")
6918 (match_operand:SVE_I 3 "register_operand"))]
6920 (match_operand:SVE_I 4 "register_operand")))]
6922 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6923 [ w , Upl , %0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6924 [ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6925 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6929 ;; Predicated integer addition of product with merging.
6930 (define_expand "cond_fma<mode>"
6931 [(set (match_operand:SVE_I 0 "register_operand")
6933 [(match_operand:<VPRED> 1 "register_operand")
6936 (match_operand:SVE_I 2 "register_operand")
6937 (match_operand:SVE_I 3 "general_operand"))
6938 (match_operand:SVE_I 4 "register_operand"))
6939 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6943 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6945 /* Swap the multiplication operands if the fallback value is the
6946 second of the two. */
6947 if (rtx_equal_p (operands[3], operands[5]))
6948 std::swap (operands[2], operands[3]);
6952 ;; Predicated integer addition of product, merging with the first input.
6953 (define_insn "*cond_fma<mode>_2"
6954 [(set (match_operand:SVE_I 0 "register_operand")
6956 [(match_operand:<VPRED> 1 "register_operand")
6959 (match_operand:SVE_I 2 "register_operand")
6960 (match_operand:SVE_I 3 "register_operand"))
6961 (match_operand:SVE_I 4 "register_operand"))
6965 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6966 [ w , Upl , 0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6967 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6971 ;; Predicated integer addition of product, merging with the third input.
6972 (define_insn "*cond_fma<mode>_4"
6973 [(set (match_operand:SVE_I 0 "register_operand")
6975 [(match_operand:<VPRED> 1 "register_operand")
6978 (match_operand:SVE_I 2 "register_operand")
6979 (match_operand:SVE_I 3 "register_operand"))
6980 (match_operand:SVE_I 4 "register_operand"))
6984 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6985 [ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6986 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6990 ;; Predicated integer addition of product, merging with an independent value.
6991 (define_insn_and_rewrite "*cond_fma<mode>_any"
6992 [(set (match_operand:SVE_I 0 "register_operand")
6994 [(match_operand:<VPRED> 1 "register_operand")
6997 (match_operand:SVE_I 2 "register_operand")
6998 (match_operand:SVE_I 3 "register_operand"))
6999 (match_operand:SVE_I 4 "register_operand"))
7000 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7003 && !rtx_equal_p (operands[2], operands[5])
7004 && !rtx_equal_p (operands[3], operands[5])
7005 && !rtx_equal_p (operands[4], operands[5])"
7006 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7007 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7008 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7009 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7010 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7011 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7012 [ ?&w , Upl , w , w , w , w ] #
7014 "&& reload_completed
7015 && register_operand (operands[5], <MODE>mode)
7016 && !rtx_equal_p (operands[0], operands[5])"
7018 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7019 operands[5], operands[1]));
7020 operands[5] = operands[4] = operands[0];
7022 [(set_attr "movprfx" "yes")]
7025 ;; -------------------------------------------------------------------------
7026 ;; ---- [INT] MLS and MSB
7027 ;; -------------------------------------------------------------------------
7031 ;; -------------------------------------------------------------------------
7033 ;; Unpredicated integer subtraction of product.
7034 (define_expand "fnma<mode>4"
7035 [(set (match_operand:SVE_I 0 "register_operand")
7037 (match_operand:SVE_I 3 "register_operand")
7041 (match_operand:SVE_I 1 "register_operand")
7042 (match_operand:SVE_I 2 "general_operand"))]
7046 if (aarch64_prepare_sve_int_fma (operands, MINUS))
7048 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7052 ;; Predicated integer subtraction of product.
7053 (define_insn "@aarch64_pred_fnma<mode>"
7054 [(set (match_operand:SVE_I 0 "register_operand")
7056 (match_operand:SVE_I 4 "register_operand")
7058 [(match_operand:<VPRED> 1 "register_operand")
7060 (match_operand:SVE_I 2 "register_operand")
7061 (match_operand:SVE_I 3 "register_operand"))]
7064 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7065 [ w , Upl , %0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7066 [ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7067 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7071 ;; Predicated integer subtraction of product with merging.
7072 (define_expand "cond_fnma<mode>"
7073 [(set (match_operand:SVE_I 0 "register_operand")
7075 [(match_operand:<VPRED> 1 "register_operand")
7077 (match_operand:SVE_I 4 "register_operand")
7079 (match_operand:SVE_I 2 "register_operand")
7080 (match_operand:SVE_I 3 "general_operand")))
7081 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7085 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
7087 /* Swap the multiplication operands if the fallback value is the
7088 second of the two. */
7089 if (rtx_equal_p (operands[3], operands[5]))
7090 std::swap (operands[2], operands[3]);
7094 ;; Predicated integer subtraction of product, merging with the first input.
7095 (define_insn "*cond_fnma<mode>_2"
7096 [(set (match_operand:SVE_I 0 "register_operand")
7098 [(match_operand:<VPRED> 1 "register_operand")
7100 (match_operand:SVE_I 4 "register_operand")
7102 (match_operand:SVE_I 2 "register_operand")
7103 (match_operand:SVE_I 3 "register_operand")))
7107 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7108 [ w , Upl , 0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7109 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7113 ;; Predicated integer subtraction of product, merging with the third input.
7114 (define_insn "*cond_fnma<mode>_4"
7115 [(set (match_operand:SVE_I 0 "register_operand")
7117 [(match_operand:<VPRED> 1 "register_operand")
7119 (match_operand:SVE_I 4 "register_operand")
7121 (match_operand:SVE_I 2 "register_operand")
7122 (match_operand:SVE_I 3 "register_operand")))
7126 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7127 [ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7128 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7132 ;; Predicated integer subtraction of product, merging with an
7133 ;; independent value.
7134 (define_insn_and_rewrite "*cond_fnma<mode>_any"
7135 [(set (match_operand:SVE_I 0 "register_operand")
7137 [(match_operand:<VPRED> 1 "register_operand")
7139 (match_operand:SVE_I 4 "register_operand")
7141 (match_operand:SVE_I 2 "register_operand")
7142 (match_operand:SVE_I 3 "register_operand")))
7143 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7146 && !rtx_equal_p (operands[2], operands[5])
7147 && !rtx_equal_p (operands[3], operands[5])
7148 && !rtx_equal_p (operands[4], operands[5])"
7149 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7150 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7151 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7152 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7153 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7154 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7155 [ ?&w , Upl , w , w , w , w ] #
7157 "&& reload_completed
7158 && register_operand (operands[5], <MODE>mode)
7159 && !rtx_equal_p (operands[0], operands[5])"
7161 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7162 operands[5], operands[1]));
7163 operands[5] = operands[4] = operands[0];
7165 [(set_attr "movprfx" "yes")]
7168 ;; -------------------------------------------------------------------------
7169 ;; ---- [INT] Dot product
7170 ;; -------------------------------------------------------------------------
7176 ;; -------------------------------------------------------------------------
7178 ;; Four-element integer dot-product with accumulation.
7179 (define_insn "<sur>dot_prod<mode><vsi2qi>"
7180 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7182 (unspec:SVE_FULL_SDI
7183 [(match_operand:<VSI2QI> 1 "register_operand")
7184 (match_operand:<VSI2QI> 2 "register_operand")]
7186 (match_operand:SVE_FULL_SDI 3 "register_operand")))]
7188 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7189 [ w , w , w , 0 ; * ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7190 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7194 ;; Four-element integer dot-product by selected lanes with accumulation.
7195 (define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
7196 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7198 (unspec:SVE_FULL_SDI
7199 [(match_operand:SVE_FULL_BHI 1 "register_operand")
7200 (unspec:SVE_FULL_BHI
7201 [(match_operand:SVE_FULL_BHI 2 "register_operand")
7202 (match_operand:SI 3 "const_int_operand")]
7203 UNSPEC_SVE_LANE_SELECT)]
7205 (match_operand:SVE_FULL_SDI 4 "register_operand")))]
7207 && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
7208 || (TARGET_STREAMING_SME2
7209 && <SVE_FULL_SDI:elem_bits> == 32
7210 && <SVE_FULL_BHI:elem_bits> == 16))"
7211 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7212 [ w , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; * ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7213 [ ?&w , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7217 (define_insn "@<sur>dot_prod<mode><vsi2qi>"
7218 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7221 [(match_operand:<VSI2QI> 1 "register_operand")
7222 (match_operand:<VSI2QI> 2 "register_operand")]
7224 (match_operand:VNx4SI_ONLY 3 "register_operand")))]
7226 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7227 [ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.b, %2.b
7228 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.b, %2.b
7232 (define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
7233 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7236 [(match_operand:VNx16QI_ONLY 1 "register_operand")
7237 (unspec:VNx16QI_ONLY
7238 [(match_operand:VNx16QI_ONLY 2 "register_operand")
7239 (match_operand:SI 3 "const_int_operand")]
7240 UNSPEC_SVE_LANE_SELECT)]
7242 (match_operand:VNx4SI_ONLY 4 "register_operand")))]
7244 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7245 [ w , w , y , 0 ; * ] <sur>dot\t%0.s, %1.b, %2.b[%3]
7246 [ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.s, %1.b, %2.b[%3]
7250 ;; -------------------------------------------------------------------------
7251 ;; ---- [INT] Sum of absolute differences
7252 ;; -------------------------------------------------------------------------
7253 ;; The patterns in this section are synthetic.
7254 ;; -------------------------------------------------------------------------
7256 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
7257 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
7258 ;; the difference into a vector and accumulate that into operand 3 before
7259 ;; copying that into the result operand 0.
7260 ;; Perform that with a sequence of:
7262 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
7263 ;; MOVPRFX op0, op3 // If necessary
7264 ;; UDOT op0.s, diff.b, ones.b
7265 (define_expand "<su>sad<vsi2qi>"
7266 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
7267 (USMAX:<VSI2QI> (match_operand:<VSI2QI> 1 "register_operand")
7268 (match_operand:<VSI2QI> 2 "register_operand"))
7269 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
7272 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
7273 rtx diff = gen_reg_rtx (<VSI2QI>mode);
7274 emit_insn (gen_<su>abd<vsi2qi>3 (diff, operands[1], operands[2]));
7275 emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], diff, ones,
7281 ;; -------------------------------------------------------------------------
7282 ;; ---- [INT] Matrix multiply-accumulate
7283 ;; -------------------------------------------------------------------------
7288 ;; -------------------------------------------------------------------------
7290 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
7291 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7294 [(match_operand:<VSI2QI> 2 "register_operand")
7295 (match_operand:<VSI2QI> 3 "register_operand")]
7297 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
7298 "TARGET_SVE_I8MM && TARGET_NON_STREAMING"
7299 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7300 [ w , 0 , w , w ; * ] <sur>mmla\t%0.s, %2.b, %3.b
7301 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sur>mmla\t%0.s, %2.b, %3.b
7305 ;; -------------------------------------------------------------------------
7306 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
7307 ;; -------------------------------------------------------------------------
7308 ;; Includes merging patterns for:
7317 ;; -------------------------------------------------------------------------
7319 ;; Unpredicated floating-point ternary operations.
7320 (define_expand "<optab><mode>4"
7321 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7324 (const_int SVE_RELAXED_GP)
7325 (match_operand:SVE_FULL_F 1 "register_operand")
7326 (match_operand:SVE_FULL_F 2 "register_operand")
7327 (match_operand:SVE_FULL_F 3 "register_operand")]
7328 SVE_COND_FP_TERNARY))]
7331 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7335 ;; Predicated floating-point ternary operations.
7336 (define_insn "@aarch64_pred_<optab><mode>"
7337 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7339 [(match_operand:<VPRED> 1 "register_operand")
7340 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7341 (match_operand:SVE_FULL_F 2 "register_operand")
7342 (match_operand:SVE_FULL_F 3 "register_operand")
7343 (match_operand:SVE_FULL_F 4 "register_operand")]
7344 SVE_COND_FP_TERNARY))]
7346 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7347 [ w , Upl , %w , w , 0 ; * ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7348 [ w , Upl , 0 , w , w ; * ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7349 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7353 ;; Predicated floating-point ternary operations with merging.
7354 (define_expand "@cond_<optab><mode>"
7355 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7357 [(match_operand:<VPRED> 1 "register_operand")
7360 (const_int SVE_STRICT_GP)
7361 (match_operand:SVE_FULL_F 2 "register_operand")
7362 (match_operand:SVE_FULL_F 3 "register_operand")
7363 (match_operand:SVE_FULL_F 4 "register_operand")]
7364 SVE_COND_FP_TERNARY)
7365 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7369 /* Swap the multiplication operands if the fallback value is the
7370 second of the two. */
7371 if (rtx_equal_p (operands[3], operands[5]))
7372 std::swap (operands[2], operands[3]);
7375 ;; Predicated floating-point ternary operations, merging with the
7377 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7378 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7380 [(match_operand:<VPRED> 1 "register_operand")
7383 (const_int SVE_RELAXED_GP)
7384 (match_operand:SVE_FULL_F 2 "register_operand")
7385 (match_operand:SVE_FULL_F 3 "register_operand")
7386 (match_operand:SVE_FULL_F 4 "register_operand")]
7387 SVE_COND_FP_TERNARY)
7391 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7392 [ w , Upl , 0 , w , w ; * ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7393 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7395 "&& !rtx_equal_p (operands[1], operands[5])"
7397 operands[5] = copy_rtx (operands[1]);
7401 (define_insn "*cond_<optab><mode>_2_strict"
7402 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7404 [(match_operand:<VPRED> 1 "register_operand")
7407 (const_int SVE_STRICT_GP)
7408 (match_operand:SVE_FULL_F 2 "register_operand")
7409 (match_operand:SVE_FULL_F 3 "register_operand")
7410 (match_operand:SVE_FULL_F 4 "register_operand")]
7411 SVE_COND_FP_TERNARY)
7415 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7416 [ w , Upl , 0 , w , w ; * ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7417 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7421 ;; Predicated floating-point ternary operations, merging with the
7423 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7424 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7426 [(match_operand:<VPRED> 1 "register_operand")
7429 (const_int SVE_RELAXED_GP)
7430 (match_operand:SVE_FULL_F 2 "register_operand")
7431 (match_operand:SVE_FULL_F 3 "register_operand")
7432 (match_operand:SVE_FULL_F 4 "register_operand")]
7433 SVE_COND_FP_TERNARY)
7437 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7438 [ w , Upl , w , w , 0 ; * ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7439 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7441 "&& !rtx_equal_p (operands[1], operands[5])"
7443 operands[5] = copy_rtx (operands[1]);
7447 (define_insn "*cond_<optab><mode>_4_strict"
7448 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7450 [(match_operand:<VPRED> 1 "register_operand")
7453 (const_int SVE_STRICT_GP)
7454 (match_operand:SVE_FULL_F 2 "register_operand")
7455 (match_operand:SVE_FULL_F 3 "register_operand")
7456 (match_operand:SVE_FULL_F 4 "register_operand")]
7457 SVE_COND_FP_TERNARY)
7461 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7462 [ w , Upl , w , w , 0 ; * ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7463 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7467 ;; Predicated floating-point ternary operations, merging with an
7468 ;; independent value.
7469 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7470 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7472 [(match_operand:<VPRED> 1 "register_operand")
7475 (const_int SVE_RELAXED_GP)
7476 (match_operand:SVE_FULL_F 2 "register_operand")
7477 (match_operand:SVE_FULL_F 3 "register_operand")
7478 (match_operand:SVE_FULL_F 4 "register_operand")]
7479 SVE_COND_FP_TERNARY)
7480 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7483 && !rtx_equal_p (operands[2], operands[5])
7484 && !rtx_equal_p (operands[3], operands[5])
7485 && !rtx_equal_p (operands[4], operands[5])"
7486 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7487 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7488 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7489 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7490 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7491 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7492 [ ?&w , Upl , w , w , w , w ] #
7496 if (reload_completed
7497 && register_operand (operands[5], <MODE>mode)
7498 && !rtx_equal_p (operands[0], operands[5]))
7500 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7501 operands[5], operands[1]));
7502 operands[5] = operands[4] = operands[0];
7504 else if (!rtx_equal_p (operands[1], operands[6]))
7505 operands[6] = copy_rtx (operands[1]);
7509 [(set_attr "movprfx" "yes")]
7512 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7513 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7515 [(match_operand:<VPRED> 1 "register_operand")
7518 (const_int SVE_STRICT_GP)
7519 (match_operand:SVE_FULL_F 2 "register_operand")
7520 (match_operand:SVE_FULL_F 3 "register_operand")
7521 (match_operand:SVE_FULL_F 4 "register_operand")]
7522 SVE_COND_FP_TERNARY)
7523 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7526 && !rtx_equal_p (operands[2], operands[5])
7527 && !rtx_equal_p (operands[3], operands[5])
7528 && !rtx_equal_p (operands[4], operands[5])"
7529 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7530 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7531 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7532 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7533 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7534 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7535 [ ?&w , Upl , w , w , w , w ] #
7537 "&& reload_completed
7538 && register_operand (operands[5], <MODE>mode)
7539 && !rtx_equal_p (operands[0], operands[5])"
7541 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7542 operands[5], operands[1]));
7543 operands[5] = operands[4] = operands[0];
7545 [(set_attr "movprfx" "yes")]
7548 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
7549 ;; (fma ...) since target-independent code won't understand the indexing.
7550 (define_insn "@aarch64_<optab>_lane_<mode>"
7551 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7553 [(match_operand:SVE_FULL_F 1 "register_operand")
7555 [(match_operand:SVE_FULL_F 2 "register_operand")
7556 (match_operand:SI 3 "const_int_operand")]
7557 UNSPEC_SVE_LANE_SELECT)
7558 (match_operand:SVE_FULL_F 4 "register_operand")]
7559 SVE_FP_TERNARY_LANE))]
7561 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7562 [ w , w , <sve_lane_con> , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7563 [ ?&w , w , <sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7567 ;; -------------------------------------------------------------------------
7568 ;; ---- [FP] Complex multiply-add
7569 ;; -------------------------------------------------------------------------
7570 ;; Includes merging patterns for:
7572 ;; -------------------------------------------------------------------------
7574 ;; Predicated FCMLA.
7575 (define_insn "@aarch64_pred_<optab><mode>"
7576 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7578 [(match_operand:<VPRED> 1 "register_operand")
7579 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7580 (match_operand:SVE_FULL_F 2 "register_operand")
7581 (match_operand:SVE_FULL_F 3 "register_operand")
7582 (match_operand:SVE_FULL_F 4 "register_operand")]
7585 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7586 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7587 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7591 ;; unpredicated optab pattern for auto-vectorizer
7592 ;; The complex mla/mls operations always need to expand to two instructions.
7593 ;; The first operation does half the computation and the second does the
7594 ;; remainder. Because of this, expand early.
7595 (define_expand "cml<fcmac1><conj_op><mode>4"
7596 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7600 (match_operand:SVE_FULL_F 1 "register_operand")
7601 (match_operand:SVE_FULL_F 2 "register_operand")
7602 (match_operand:SVE_FULL_F 3 "register_operand")]
7606 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7607 operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7608 rtx tmp = gen_reg_rtx (<MODE>mode);
7610 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7611 operands[2], operands[1],
7612 operands[3], operands[5]));
7614 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7615 operands[2], operands[1],
7620 ;; unpredicated optab pattern for auto-vectorizer
7621 ;; The complex mul operations always need to expand to two instructions.
7622 ;; The first operation does half the computation and the second does the
7623 ;; remainder. Because of this, expand early.
7624 (define_expand "cmul<conj_op><mode>3"
7625 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7627 [(match_operand:SVE_FULL_F 1 "register_operand")
7628 (match_operand:SVE_FULL_F 2 "register_operand")]
7632 rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7633 rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7634 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7635 rtx tmp = gen_reg_rtx (<MODE>mode);
7637 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7638 operands[2], operands[1],
7641 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7642 operands[2], operands[1],
7647 ;; Predicated FCMLA with merging.
7648 (define_expand "@cond_<optab><mode>"
7649 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7651 [(match_operand:<VPRED> 1 "register_operand")
7654 (const_int SVE_STRICT_GP)
7655 (match_operand:SVE_FULL_F 2 "register_operand")
7656 (match_operand:SVE_FULL_F 3 "register_operand")
7657 (match_operand:SVE_FULL_F 4 "register_operand")]
7659 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7664 ;; Predicated FCMLA, merging with the third input.
7665 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7666 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7668 [(match_operand:<VPRED> 1 "register_operand")
7671 (const_int SVE_RELAXED_GP)
7672 (match_operand:SVE_FULL_F 2 "register_operand")
7673 (match_operand:SVE_FULL_F 3 "register_operand")
7674 (match_operand:SVE_FULL_F 4 "register_operand")]
7679 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7680 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7681 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7683 "&& !rtx_equal_p (operands[1], operands[5])"
7685 operands[5] = copy_rtx (operands[1]);
7689 (define_insn "*cond_<optab><mode>_4_strict"
7690 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7692 [(match_operand:<VPRED> 1 "register_operand")
7695 (const_int SVE_STRICT_GP)
7696 (match_operand:SVE_FULL_F 2 "register_operand")
7697 (match_operand:SVE_FULL_F 3 "register_operand")
7698 (match_operand:SVE_FULL_F 4 "register_operand")]
7703 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7704 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7705 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7709 ;; Predicated FCMLA, merging with an independent value.
7710 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7711 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7713 [(match_operand:<VPRED> 1 "register_operand")
7716 (const_int SVE_RELAXED_GP)
7717 (match_operand:SVE_FULL_F 2 "register_operand")
7718 (match_operand:SVE_FULL_F 3 "register_operand")
7719 (match_operand:SVE_FULL_F 4 "register_operand")]
7721 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7723 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7724 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7725 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7726 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7727 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7728 [ ?&w , Upl , w , w , w , w ] #
7732 if (reload_completed
7733 && register_operand (operands[5], <MODE>mode)
7734 && !rtx_equal_p (operands[0], operands[5]))
7736 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7737 operands[5], operands[1]));
7738 operands[5] = operands[4] = operands[0];
7740 else if (!rtx_equal_p (operands[1], operands[6]))
7741 operands[6] = copy_rtx (operands[1]);
7745 [(set_attr "movprfx" "yes")]
7748 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7749 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7751 [(match_operand:<VPRED> 1 "register_operand")
7754 (const_int SVE_STRICT_GP)
7755 (match_operand:SVE_FULL_F 2 "register_operand")
7756 (match_operand:SVE_FULL_F 3 "register_operand")
7757 (match_operand:SVE_FULL_F 4 "register_operand")]
7759 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7761 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7762 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7763 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7764 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7765 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7766 [ ?&w , Upl , w , w , w , w ] #
7768 "&& reload_completed
7769 && register_operand (operands[5], <MODE>mode)
7770 && !rtx_equal_p (operands[0], operands[5])"
7772 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7773 operands[5], operands[1]));
7774 operands[5] = operands[4] = operands[0];
7776 [(set_attr "movprfx" "yes")]
7779 ;; Unpredicated FCMLA with indexing.
7780 (define_insn "@aarch64_<optab>_lane_<mode>"
7781 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7782 (unspec:SVE_FULL_HSF
7783 [(match_operand:SVE_FULL_HSF 1 "register_operand")
7784 (unspec:SVE_FULL_HSF
7785 [(match_operand:SVE_FULL_HSF 2 "register_operand")
7786 (match_operand:SI 3 "const_int_operand")]
7787 UNSPEC_SVE_LANE_SELECT)
7788 (match_operand:SVE_FULL_HSF 4 "register_operand")]
7791 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7792 [ w , w , <sve_lane_pair_con> , 0 ; * ] fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7793 [ ?&w , w , <sve_lane_pair_con> , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7797 ;; -------------------------------------------------------------------------
7798 ;; ---- [FP] Trigonometric multiply-add
7799 ;; -------------------------------------------------------------------------
7802 ;; -------------------------------------------------------------------------
7804 (define_insn "@aarch64_sve_tmad<mode>"
7805 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7807 [(match_operand:SVE_FULL_F 1 "register_operand")
7808 (match_operand:SVE_FULL_F 2 "register_operand")
7809 (match_operand:DI 3 "const_int_operand")]
7812 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
7813 [ w , 0 , w ; * ] ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7814 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7818 ;; -------------------------------------------------------------------------
7819 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7820 ;; -------------------------------------------------------------------------
7828 ;; -------------------------------------------------------------------------
7830 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7831 [(set (match_operand:VNx4SF 0 "register_operand")
7833 [(match_operand:VNx4SF 1 "register_operand")
7834 (match_operand:VNx8BF 2 "register_operand")
7835 (match_operand:VNx8BF 3 "register_operand")]
7836 SVE_BFLOAT_TERNARY_LONG))]
7838 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7839 [ w , 0 , w , w ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h
7840 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
7844 ;; The immediate range is enforced before generating the instruction.
7845 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7846 [(set (match_operand:VNx4SF 0 "register_operand")
7848 [(match_operand:VNx4SF 1 "register_operand")
7849 (match_operand:VNx8BF 2 "register_operand")
7850 (match_operand:VNx8BF 3 "register_operand")
7851 (match_operand:SI 4 "const_int_operand")]
7852 SVE_BFLOAT_TERNARY_LONG_LANE))]
7854 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7855 [ w , 0 , w , y ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7856 [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7860 ;; -------------------------------------------------------------------------
7861 ;; ---- [FP] Matrix multiply-accumulate
7862 ;; -------------------------------------------------------------------------
7864 ;; - FMMLA (F32MM,F64MM)
7865 ;; -------------------------------------------------------------------------
7867 ;; The mode iterator enforces the target requirements.
7868 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7869 [(set (match_operand:SVE_MATMULF 0 "register_operand")
7871 [(match_operand:SVE_MATMULF 2 "register_operand")
7872 (match_operand:SVE_MATMULF 3 "register_operand")
7873 (match_operand:SVE_MATMULF 1 "register_operand")]
7875 "TARGET_SVE && TARGET_NON_STREAMING"
7876 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7877 [ w , 0 , w , w ; * ] <sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7878 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7882 ;; =========================================================================
7883 ;; == Comparisons and selects
7884 ;; =========================================================================
7886 ;; -------------------------------------------------------------------------
7887 ;; ---- [INT,FP] Select based on predicates
7888 ;; -------------------------------------------------------------------------
7889 ;; Includes merging patterns for:
7893 ;; -------------------------------------------------------------------------
7895 ;; vcond_mask operand order: true, false, mask
7896 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7897 ;; SEL operand order: mask, true, false
7898 (define_expand "@vcond_mask_<mode><vpred>"
7899 [(set (match_operand:SVE_ALL 0 "register_operand")
7901 [(match_operand:<VPRED> 3 "register_operand")
7902 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7903 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7907 if (register_operand (operands[1], <MODE>mode))
7908 operands[2] = force_reg (<MODE>mode, operands[2]);
7914 ;; - a duplicated immediate and a register
7915 ;; - a duplicated immediate and zero
7917 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
7918 ;; the container size or the element size. If SEL used the container size,
7919 ;; it would ignore undefined bits of the predicate but would copy the
7920 ;; upper (undefined) bits of each container along with the defined bits.
7921 ;; If SEL used the element size, it would use undefined bits of the predicate
7922 ;; to select between undefined elements in each input vector. Thus the only
7923 ;; difference is whether the undefined bits in a container always come from
7924 ;; the same input as the defined bits, or whether the choice can vary
7925 ;; independently of the defined bits.
7927 ;; For the other instructions, using the element size is more natural,
7928 ;; so we do that for SEL as well.
7929 (define_insn "*vcond_mask_<mode><vpred>"
7930 [(set (match_operand:SVE_ALL 0 "register_operand")
7932 [(match_operand:<VPRED> 3 "register_operand")
7933 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7934 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7937 && (!register_operand (operands[1], <MODE>mode)
7938 || register_operand (operands[2], <MODE>mode))"
7939 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7940 [ w , w , w , Upa ; * ] sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7941 [ w , vss , 0 , Upa ; * ] mov\t%0.<Vetype>, %3/m, #%I1
7942 [ w , vss , Dz , Upa ; * ] mov\t%0.<Vetype>, %3/z, #%I1
7943 [ w , Ufc , 0 , Upa ; * ] fmov\t%0.<Vetype>, %3/m, #%1
7944 [ ?w , Ufc , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7945 [ ?&w , vss , w , Upa ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7946 [ ?&w , Ufc , w , Upa ; yes ] movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1
7950 ;; Optimize selects between a duplicated scalar variable and another vector,
7951 ;; the latter of which can be a zero constant or a variable. Treat duplicates
7952 ;; of GPRs as being more expensive than duplicates of FPRs, since they
7953 ;; involve a cross-file move.
7954 (define_insn "@aarch64_sel_dup<mode>"
7955 [(set (match_operand:SVE_ALL 0 "register_operand")
7957 [(match_operand:<VPRED> 3 "register_operand")
7958 (vec_duplicate:SVE_ALL
7959 (match_operand:<VEL> 1 "register_operand"))
7960 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7963 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7964 [ ?w , r , 0 , Upl ; * ] mov\t%0.<Vetype>, %3/m, %<vwcore>1
7965 [ w , w , 0 , Upl ; * ] mov\t%0.<Vetype>, %3/m, %<Vetype>1
7966 [ ??w , r , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7967 [ ?&w , w , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7968 [ ??&w , r , w , Upl ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7969 [ ?&w , w , w , Upl ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7973 ;; -------------------------------------------------------------------------
7974 ;; ---- [INT,FP] Compare and select
7975 ;; -------------------------------------------------------------------------
7976 ;; The patterns in this section are synthetic.
7977 ;; -------------------------------------------------------------------------
7979 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
7980 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7981 (define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
7982 [(set (match_operand:SVE_ALL 0 "register_operand")
7983 (if_then_else:SVE_ALL
7984 (match_operator 3 "comparison_operator"
7985 [(match_operand:SVE_I 4 "register_operand")
7986 (match_operand:SVE_I 5 "nonmemory_operand")])
7987 (match_operand:SVE_ALL 1 "nonmemory_operand")
7988 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7989 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7991 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7996 ;; Integer vcondu. Don't enforce an immediate range here, since it
7997 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7998 (define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
7999 [(set (match_operand:SVE_ALL 0 "register_operand")
8000 (if_then_else:SVE_ALL
8001 (match_operator 3 "comparison_operator"
8002 [(match_operand:SVE_I 4 "register_operand")
8003 (match_operand:SVE_I 5 "nonmemory_operand")])
8004 (match_operand:SVE_ALL 1 "nonmemory_operand")
8005 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
8006 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
8008 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
8013 ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
8014 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
8015 (define_expand "vcond<mode><v_fp_equiv>"
8016 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
8017 (if_then_else:SVE_FULL_HSD
8018 (match_operator 3 "comparison_operator"
8019 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
8020 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
8021 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
8022 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
8025 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
8030 ;; -------------------------------------------------------------------------
8031 ;; ---- [INT] Comparisons
8032 ;; -------------------------------------------------------------------------
8044 ;; -------------------------------------------------------------------------
8046 ;; Signed integer comparisons. Don't enforce an immediate range here, since
8047 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8049 (define_expand "vec_cmp<mode><vpred>"
8051 [(set (match_operand:<VPRED> 0 "register_operand")
8052 (match_operator:<VPRED> 1 "comparison_operator"
8053 [(match_operand:SVE_I 2 "register_operand")
8054 (match_operand:SVE_I 3 "nonmemory_operand")]))
8055 (clobber (reg:CC_NZC CC_REGNUM))])]
8058 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8059 operands[2], operands[3]);
8064 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
8065 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8067 (define_expand "vec_cmpu<mode><vpred>"
8069 [(set (match_operand:<VPRED> 0 "register_operand")
8070 (match_operator:<VPRED> 1 "comparison_operator"
8071 [(match_operand:SVE_I 2 "register_operand")
8072 (match_operand:SVE_I 3 "nonmemory_operand")]))
8073 (clobber (reg:CC_NZC CC_REGNUM))])]
8076 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8077 operands[2], operands[3]);
8082 ;; Predicated integer comparisons.
8084 ;; For unpacked vectors, only the lowpart element in each input container
8085 ;; has a defined value, and only the predicate bits associated with
8086 ;; those elements are defined. For example, when comparing two VNx2SIs:
8088 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
8089 ;; DI container store an SI element. The upper bits of each DI container
8092 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
8093 ;; even elements are defined and the odd elements are undefined.
8095 ;; - The associated predicate mode is VNx2BI. This means that only the
8096 ;; low bit in each predicate byte is defined (on input and on output).
8098 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
8099 ;; VNx2BI governing predicate, to produce a VNx2BI result. If we view
8100 ;; the .s operation as operating on VNx4SIs then for odd lanes:
8102 ;; - the input governing predicate bit is undefined
8103 ;; - the SI elements being compared are undefined
8104 ;; - the predicate result bit is therefore undefined, but
8105 ;; - the predicate result bit is in the undefined part of a VNx2BI,
8106 ;; so its value doesn't matter anyway.
8107 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
8108 [(set (match_operand:<VPRED> 0 "register_operand")
8110 [(match_operand:<VPRED> 1 "register_operand")
8111 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8112 (SVE_INT_CMP:<VPRED>
8113 (match_operand:SVE_I 3 "register_operand")
8114 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8116 (clobber (reg:CC_NZC CC_REGNUM))]
8118 {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ]
8119 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
8120 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8121 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8122 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8123 [ ?Upl , 0 , w , w ; yes ] ^
8124 [ Upa , Upl, w , w ; no ] ^
8128 ;; Predicated integer comparisons in which both the flag and predicate
8129 ;; results are interesting.
8130 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
8131 [(set (reg:CC_NZC CC_REGNUM)
8133 [(match_operand:VNx16BI 1 "register_operand")
8135 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8138 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8139 (SVE_INT_CMP:<VPRED>
8140 (match_operand:SVE_I 2 "register_operand")
8141 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8144 (set (match_operand:<VPRED> 0 "register_operand")
8148 (SVE_INT_CMP:<VPRED>
8153 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8154 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
8155 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8156 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8157 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8158 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8159 [ ?Upl , 0 , w , w ; yes ] ^
8160 [ Upa , Upl, w , w ; no ] ^
8162 "&& !rtx_equal_p (operands[4], operands[6])"
8164 operands[6] = copy_rtx (operands[4]);
8165 operands[7] = operands[5];
8169 ;; Predicated integer comparisons in which only the flags result is
8171 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
8172 [(set (reg:CC_NZC CC_REGNUM)
8174 [(match_operand:VNx16BI 1 "register_operand")
8176 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8179 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8180 (SVE_INT_CMP:<VPRED>
8181 (match_operand:SVE_I 2 "register_operand")
8182 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8185 (clobber (match_scratch:<VPRED> 0))]
8187 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8188 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
8189 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8190 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8191 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8192 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8193 [ ?Upl , 0 , w , w ; yes ] ^
8194 [ Upa , Upl, w , w ; no ] ^
8196 "&& !rtx_equal_p (operands[4], operands[6])"
8198 operands[6] = copy_rtx (operands[4]);
8199 operands[7] = operands[5];
8203 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
8204 ;; comparison with an AND. Split the instruction into its preferred form
8205 ;; at the earliest opportunity, in order to get rid of the redundant
8207 (define_insn_and_split "*cmp<cmp_op><mode>_and"
8208 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8212 (const_int SVE_KNOWN_PTRUE)
8213 (SVE_INT_CMP:<VPRED>
8214 (match_operand:SVE_I 2 "register_operand" "w, w")
8215 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
8217 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
8218 (clobber (reg:CC_NZC CC_REGNUM))]
8226 (const_int SVE_MAYBE_NOT_PTRUE)
8227 (SVE_INT_CMP:<VPRED>
8231 (clobber (reg:CC_NZC CC_REGNUM))])]
8234 ;; Predicated integer wide comparisons.
8235 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
8236 [(set (match_operand:<VPRED> 0 "register_operand")
8238 [(match_operand:VNx16BI 1 "register_operand")
8239 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8241 [(match_operand:SVE_FULL_BHSI 3 "register_operand")
8242 (match_operand:VNx2DI 4 "register_operand")]
8243 SVE_COND_INT_CMP_WIDE)]
8245 (clobber (reg:CC_NZC CC_REGNUM))]
8247 {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ]
8248 [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
8249 [ ?Upl , 0 , , w, w; yes ] ^
8250 [ Upa , Upl, , w, w; no ] ^
8254 ;; Predicated integer wide comparisons in which both the flag and
8255 ;; predicate results are interesting.
8256 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
8257 [(set (reg:CC_NZC CC_REGNUM)
8259 [(match_operand:VNx16BI 1 "register_operand")
8261 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8263 [(match_operand:VNx16BI 6 "register_operand")
8264 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8266 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8267 (match_operand:VNx2DI 3 "register_operand")]
8268 SVE_COND_INT_CMP_WIDE)]
8271 (set (match_operand:<VPRED> 0 "register_operand")
8278 SVE_COND_INT_CMP_WIDE)]
8281 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8282 {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
8283 [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8284 [ ?Upl , 0 , w, w, Upl; yes ] ^
8285 [ Upa , Upl, w, w, Upl; no ] ^
8289 ;; Predicated integer wide comparisons in which only the flags result
8291 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
8292 [(set (reg:CC_NZC CC_REGNUM)
8294 [(match_operand:VNx16BI 1 "register_operand")
8296 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8298 [(match_operand:VNx16BI 6 "register_operand")
8299 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8301 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8302 (match_operand:VNx2DI 3 "register_operand")]
8303 SVE_COND_INT_CMP_WIDE)]
8306 (clobber (match_scratch:<VPRED> 0))]
8308 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8309 {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
8310 [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8311 [ ?Upl , 0 , w, w, Upl; yes ] ^
8312 [ Upa , Upl, w, w, Upl; no ] ^
8316 ;; -------------------------------------------------------------------------
8317 ;; ---- [INT] While tests
8318 ;; -------------------------------------------------------------------------
8330 ;; -------------------------------------------------------------------------
8338 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
8339 ;; true for all J in [0, I].
8340 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
8341 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8342 (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8343 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8344 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8346 (clobber (reg:CC_NZC CC_REGNUM))]
8348 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8351 ;; The WHILE instructions set the flags in the same way as a PTEST with
8352 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
8353 ;; operands to the PTEST aren't needed, so we allow them to be anything.
8354 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
8355 [(set (reg:CC_NZC CC_REGNUM)
8359 (const_int SVE_KNOWN_PTRUE)
8361 [(const_int SVE_WHILE_B)
8362 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8363 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8366 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8367 (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8372 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8373 ;; Force the compiler to drop the unused predicate operand, so that we
8374 ;; don't have an unnecessary PTRUE.
8375 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8377 operands[3] = CONSTM1_RTX (VNx16BImode);
8378 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8382 ;; Same, but handle the case in which only the flags result is useful.
8383 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8384 [(set (reg:CC_NZC CC_REGNUM)
8388 (const_int SVE_KNOWN_PTRUE)
8390 [(const_int SVE_WHILE_B)
8391 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8392 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8395 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8397 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8398 ;; Force the compiler to drop the unused predicate operand, so that we
8399 ;; don't have an unnecessary PTRUE.
8400 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8402 operands[3] = CONSTM1_RTX (VNx16BImode);
8403 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8407 ;; -------------------------------------------------------------------------
8408 ;; ---- [FP] Direct comparisons
8409 ;; -------------------------------------------------------------------------
8418 ;; -------------------------------------------------------------------------
8420 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
8421 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8423 (define_expand "vec_cmp<mode><vpred>"
8424 [(set (match_operand:<VPRED> 0 "register_operand")
8425 (match_operator:<VPRED> 1 "comparison_operator"
8426 [(match_operand:SVE_FULL_F 2 "register_operand")
8427 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8430 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8431 operands[2], operands[3], false);
8436 ;; Predicated floating-point comparisons.
8437 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8438 [(set (match_operand:<VPRED> 0 "register_operand")
8440 [(match_operand:<VPRED> 1 "register_operand")
8441 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8442 (match_operand:SVE_FULL_F 3 "register_operand")
8443 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
8444 SVE_COND_FP_CMP_I0))]
8446 {@ [ cons: =0 , 1 , 3 , 4 ]
8447 [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8448 [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8452 ;; Same for unordered comparisons.
8453 (define_insn "@aarch64_pred_fcmuo<mode>"
8454 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8456 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8457 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8458 (match_operand:SVE_FULL_F 3 "register_operand" "w")
8459 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8460 UNSPEC_COND_FCMUO))]
8462 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8465 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8466 ;; with another predicate P. This does not have the same trapping behavior
8467 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8468 ;; since we can drop any potentially-trapping operations whose results
8471 ;; Split the instruction into its preferred form (below) at the earliest
8472 ;; opportunity, in order to get rid of the redundant operand 1.
8473 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8474 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8477 [(match_operand:<VPRED> 1)
8478 (const_int SVE_KNOWN_PTRUE)
8479 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8480 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8482 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8489 (const_int SVE_MAYBE_NOT_PTRUE)
8492 SVE_COND_FP_CMP_I0))]
8495 ;; Same for unordered comparisons.
8496 (define_insn_and_split "*fcmuo<mode>_and_combine"
8497 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8500 [(match_operand:<VPRED> 1)
8501 (const_int SVE_KNOWN_PTRUE)
8502 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8503 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8505 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8512 (const_int SVE_MAYBE_NOT_PTRUE)
8515 UNSPEC_COND_FCMUO))]
8518 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8519 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8520 ;; the comparison on the BIC operand removes the need for a PTRUE.
8521 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8522 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8527 [(match_operand:<VPRED> 1)
8528 (const_int SVE_KNOWN_PTRUE)
8529 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8530 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8531 SVE_COND_FP_CMP_I0))
8532 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8533 (match_dup:<VPRED> 1)))
8534 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8541 (const_int SVE_MAYBE_NOT_PTRUE)
8544 SVE_COND_FP_CMP_I0))
8551 if (can_create_pseudo_p ())
8552 operands[5] = gen_reg_rtx (<VPRED>mode);
8556 ;; Make sure that we expand to a nor when the operand 4 of
8557 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8558 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8559 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8564 [(match_operand:<VPRED> 1)
8565 (const_int SVE_KNOWN_PTRUE)
8566 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8567 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8568 SVE_COND_FP_CMP_I0))
8570 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8571 (match_dup:<VPRED> 1)))
8572 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8579 (const_int SVE_KNOWN_PTRUE)
8582 SVE_COND_FP_CMP_I0))
8592 if (can_create_pseudo_p ())
8593 operands[5] = gen_reg_rtx (<VPRED>mode);
8597 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8598 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8603 [(match_operand:<VPRED> 1)
8604 (const_int SVE_KNOWN_PTRUE)
8605 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8606 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8608 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8609 (match_dup:<VPRED> 1)))
8610 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8617 (const_int SVE_MAYBE_NOT_PTRUE)
8627 if (can_create_pseudo_p ())
8628 operands[5] = gen_reg_rtx (<VPRED>mode);
8632 ;; Same for unordered comparisons.
8633 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8634 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8639 [(match_operand:<VPRED> 1)
8640 (const_int SVE_KNOWN_PTRUE)
8641 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8642 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8645 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8646 (match_dup:<VPRED> 1)))
8647 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8654 (const_int SVE_KNOWN_PTRUE)
8667 if (can_create_pseudo_p ())
8668 operands[5] = gen_reg_rtx (<VPRED>mode);
8672 ;; -------------------------------------------------------------------------
8673 ;; ---- [FP] Absolute comparisons
8674 ;; -------------------------------------------------------------------------
8680 ;; -------------------------------------------------------------------------
8682 ;; Predicated floating-point absolute comparisons.
8683 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8684 [(set (match_operand:<VPRED> 0 "register_operand")
8686 [(match_operand:<VPRED> 1 "register_operand")
8687 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8691 (match_operand:SVE_FULL_F 3 "register_operand")]
8696 (match_operand:SVE_FULL_F 4 "register_operand")]
8698 SVE_COND_FP_ABS_CMP))]
8702 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8703 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8705 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8706 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8709 (const_int SVE_RELAXED_GP)
8710 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8714 (const_int SVE_RELAXED_GP)
8715 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8717 SVE_COND_FP_ABS_CMP))]
8719 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8720 "&& (!rtx_equal_p (operands[1], operands[5])
8721 || !rtx_equal_p (operands[1], operands[6]))"
8723 operands[5] = copy_rtx (operands[1]);
8724 operands[6] = copy_rtx (operands[1]);
8728 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8729 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8731 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8732 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8735 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8736 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8740 (match_operand:SI 6 "aarch64_sve_gp_strictness")
8741 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8743 SVE_COND_FP_ABS_CMP))]
8745 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8748 ;; -------------------------------------------------------------------------
8749 ;; ---- [PRED] Select
8750 ;; -------------------------------------------------------------------------
8753 ;; -------------------------------------------------------------------------
8755 (define_insn "@vcond_mask_<mode><mode>"
8756 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8759 (match_operand:PRED_ALL 3 "register_operand" "Upa")
8760 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8763 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8765 "sel\t%0.b, %3, %1.b, %2.b"
8768 ;; -------------------------------------------------------------------------
8769 ;; ---- [PRED] Test bits
8770 ;; -------------------------------------------------------------------------
8773 ;; -------------------------------------------------------------------------
8775 ;; Branch based on predicate equality or inequality.
8776 (define_expand "cbranch<mode>4"
8779 (match_operator 0 "aarch64_equality_operator"
8780 [(match_operand:PRED_ALL 1 "register_operand")
8781 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8782 (label_ref (match_operand 3 ""))
8786 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8787 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8788 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8790 if (operands[2] == CONST0_RTX (<MODE>mode))
8794 pred = gen_reg_rtx (<MODE>mode);
8795 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8798 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8799 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8800 operands[2] = const0_rtx;
8804 ;; See "Description of UNSPEC_PTEST" above for details.
8805 (define_insn "aarch64_ptest<mode>"
8806 [(set (reg:CC_NZC CC_REGNUM)
8807 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8809 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8810 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8816 ;; =========================================================================
8818 ;; =========================================================================
8820 ;; -------------------------------------------------------------------------
8821 ;; ---- [INT,FP] Conditional reductions
8822 ;; -------------------------------------------------------------------------
8826 ;; -------------------------------------------------------------------------
8828 ;; Set operand 0 to the last active element in operand 3, or to tied
8829 ;; operand 1 if no elements are active.
8830 (define_insn "@fold_extract_<last_op>_<mode>"
8831 [(set (match_operand:<VEL> 0 "register_operand")
8833 [(match_operand:<VEL> 1 "register_operand")
8834 (match_operand:<VPRED> 2 "register_operand")
8835 (match_operand:SVE_FULL 3 "register_operand")]
8838 {@ [ cons: =0 , 1 , 2 , 3 ]
8839 [ ?r , 0 , Upl , w ] clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8840 [ w , 0 , Upl , w ] clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>
8844 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8845 [(set (match_operand:SVE_FULL 0 "register_operand")
8847 [(match_operand:SVE_FULL 1 "register_operand")
8848 (match_operand:<VPRED> 2 "register_operand")
8849 (match_operand:SVE_FULL 3 "register_operand")]
8852 {@ [ cons: =0 , 1 , 2 , 3 ]
8853 [ w , 0 , Upl , w ] clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8854 [ ?&w , w , Upl , w ] movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8858 ;; -------------------------------------------------------------------------
8859 ;; ---- [INT] Tree reductions
8860 ;; -------------------------------------------------------------------------
8871 ;; -------------------------------------------------------------------------
8873 ;; Unpredicated integer add reduction.
8874 (define_expand "reduc_plus_scal_<mode>"
8875 [(match_operand:<VEL> 0 "register_operand")
8876 (match_operand:SVE_FULL_I 1 "register_operand")]
8879 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8880 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8881 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8882 if (tmp != operands[0])
8883 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8888 ;; Predicated integer add reduction. The result is always 64-bits.
8889 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8890 [(set (match_operand:DI 0 "register_operand" "=w")
8891 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8892 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8894 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8895 "<su>addv\t%d0, %1, %2.<Vetype>"
8898 ;; Unpredicated integer reductions.
8899 (define_expand "reduc_<optab>_scal_<mode>"
8900 [(set (match_operand:<VEL> 0 "register_operand")
8901 (unspec:<VEL> [(match_dup 2)
8902 (match_operand:SVE_FULL_I 1 "register_operand")]
8903 SVE_INT_REDUCTION))]
8906 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8910 ;; Predicated integer reductions.
8911 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8912 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8913 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8914 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8915 SVE_INT_REDUCTION))]
8917 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8920 ;; -------------------------------------------------------------------------
8921 ;; ---- [FP] Tree reductions
8922 ;; -------------------------------------------------------------------------
8929 ;; -------------------------------------------------------------------------
8931 ;; Unpredicated floating-point tree reductions.
8932 (define_expand "reduc_<optab>_scal_<mode>"
8933 [(set (match_operand:<VEL> 0 "register_operand")
8934 (unspec:<VEL> [(match_dup 2)
8935 (match_operand:SVE_FULL_F 1 "register_operand")]
8939 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8943 (define_expand "reduc_<fmaxmin>_scal_<mode>"
8944 [(match_operand:<VEL> 0 "register_operand")
8945 (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
8949 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
8954 ;; Predicated floating-point tree reductions.
8955 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8956 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8957 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8958 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8961 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
8964 ;; -------------------------------------------------------------------------
8965 ;; ---- [FP] Left-to-right reductions
8966 ;; -------------------------------------------------------------------------
8969 ;; -------------------------------------------------------------------------
8971 ;; Unpredicated in-order FP reductions.
8972 (define_expand "fold_left_plus_<mode>"
8973 [(set (match_operand:<VEL> 0 "register_operand")
8974 (unspec:<VEL> [(match_dup 3)
8975 (match_operand:<VEL> 1 "register_operand")
8976 (match_operand:SVE_FULL_F 2 "register_operand")]
8978 "TARGET_SVE && TARGET_NON_STREAMING"
8980 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8984 ;; Predicated in-order FP reductions.
8985 (define_insn "mask_fold_left_plus_<mode>"
8986 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8987 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8988 (match_operand:<VEL> 1 "register_operand" "0")
8989 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8991 "TARGET_SVE && TARGET_NON_STREAMING"
8992 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
8995 ;; =========================================================================
8997 ;; =========================================================================
8999 ;; -------------------------------------------------------------------------
9000 ;; ---- [INT,FP] General permutes
9001 ;; -------------------------------------------------------------------------
9004 ;; -------------------------------------------------------------------------
9006 (define_expand "vec_perm<mode>"
9007 [(match_operand:SVE_FULL 0 "register_operand")
9008 (match_operand:SVE_FULL 1 "register_operand")
9009 (match_operand:SVE_FULL 2 "register_operand")
9010 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
9011 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9013 aarch64_expand_sve_vec_perm (operands[0], operands[1],
9014 operands[2], operands[3]);
9019 (define_insn "@aarch64_sve_tbl<mode>"
9020 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9022 [(match_operand:SVE_FULL 1 "register_operand" "w")
9023 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
9026 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9029 ;; -------------------------------------------------------------------------
9030 ;; ---- [INT,FP] Special-purpose unary permutes
9031 ;; -------------------------------------------------------------------------
9036 ;; -------------------------------------------------------------------------
9038 ;; Compact active elements and pad with zeros.
9039 (define_insn "@aarch64_sve_compact<mode>"
9040 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
9042 [(match_operand:<VPRED> 1 "register_operand" "Upl")
9043 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
9044 UNSPEC_SVE_COMPACT))]
9045 "TARGET_SVE && TARGET_NON_STREAMING"
9046 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
9049 ;; Duplicate one element of a vector.
9050 (define_insn "@aarch64_sve_dup_lane<mode>"
9051 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9052 (vec_duplicate:SVE_ALL
9054 (match_operand:SVE_ALL 1 "register_operand" "w")
9055 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
9057 && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
9058 "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
9061 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
9063 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
9064 ;; number op2 + N of op1. (We don't need to distinguish between memory
9065 ;; and architectural register lane numbering for op1 or op0, since the
9066 ;; two numbering schemes are the same for SVE.)
9068 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
9069 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
9070 ;; of op0. We therefore get the correct result for both endiannesses.
9072 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
9073 ;; is in the opposite order to architectural register lane numbering.
9074 ;; Thus if we were to do this operation via a V128 temporary register,
9075 ;; the vec_select and vec_duplicate would both involve a reverse operation
9076 ;; for big-endian targets. In this fused pattern the two reverses cancel
9078 (define_insn "@aarch64_sve_dupq_lane<mode>"
9079 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9080 (vec_duplicate:SVE_FULL
9082 (match_operand:SVE_FULL 1 "register_operand" "w")
9083 (match_operand 2 "ascending_int_parallel"))))]
9085 && (INTVAL (XVECEXP (operands[2], 0, 0))
9086 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
9087 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
9088 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
9090 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
9091 * GET_MODE_SIZE (<VEL>mode));
9092 operands[2] = gen_int_mode (byte / 16, DImode);
9093 return "dup\t%0.q, %1.q[%2]";
9097 ;; Reverse the order of elements within a full vector.
9098 (define_insn "@aarch64_sve_rev<mode>"
9099 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9101 [(match_operand:SVE_ALL 1 "register_operand" "w")]
9104 "rev\t%0.<Vctype>, %1.<Vctype>")
9106 ;; -------------------------------------------------------------------------
9107 ;; ---- [INT,FP] Special-purpose binary permutes
9108 ;; -------------------------------------------------------------------------
9118 ;; -------------------------------------------------------------------------
9120 ;; Like EXT, but start at the first active element.
9121 (define_insn "@aarch64_sve_splice<mode>"
9122 [(set (match_operand:SVE_FULL 0 "register_operand")
9124 [(match_operand:<VPRED> 1 "register_operand")
9125 (match_operand:SVE_FULL 2 "register_operand")
9126 (match_operand:SVE_FULL 3 "register_operand")]
9127 UNSPEC_SVE_SPLICE))]
9129 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9130 [ w , Upl , 0 , w ; * ] splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9131 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9135 ;; Permutes that take half the elements from one vector and half the
9136 ;; elements from the other.
9137 (define_insn "@aarch64_sve_<perm_insn><mode>"
9138 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9140 [(match_operand:SVE_ALL 1 "register_operand" "w")
9141 (match_operand:SVE_ALL 2 "register_operand" "w")]
9144 "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9147 ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
9148 ;; doesn't depend on the mode.
9149 (define_insn "@aarch64_sve_<optab><mode>"
9150 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9152 [(match_operand:SVE_FULL 1 "register_operand" "w")
9153 (match_operand:SVE_FULL 2 "register_operand" "w")]
9156 "<perm_insn>\t%0.q, %1.q, %2.q"
9159 ;; Concatenate two vectors and extract a subvector. Note that the
9160 ;; immediate (third) operand is the lane index not the byte index.
9161 (define_insn "@aarch64_sve_ext<mode>"
9162 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
9164 [(match_operand:SVE_ALL 1 "register_operand" "0, w")
9165 (match_operand:SVE_ALL 2 "register_operand" "w, w")
9166 (match_operand:SI 3 "const_int_operand")]
9169 && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
9171 operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
9172 return (which_alternative == 0
9173 ? "ext\\t%0.b, %0.b, %2.b, #%3"
9174 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
9176 [(set_attr "movprfx" "*,yes")]
9179 ;; -------------------------------------------------------------------------
9180 ;; ---- [PRED] Special-purpose unary permutes
9181 ;; -------------------------------------------------------------------------
9184 ;; -------------------------------------------------------------------------
9186 (define_insn "@aarch64_sve_rev<mode>"
9187 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9188 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
9191 "rev\t%0.<Vetype>, %1.<Vetype>")
9193 ;; -------------------------------------------------------------------------
9194 ;; ---- [PRED] Special-purpose binary permutes
9195 ;; -------------------------------------------------------------------------
9203 ;; -------------------------------------------------------------------------
9205 ;; Permutes that take half the elements from one vector and half the
9206 ;; elements from the other.
9207 (define_insn "@aarch64_sve_<perm_insn><mode>"
9208 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9209 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9210 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9213 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9216 ;; Special purpose permute used by the predicate generation instructions.
9217 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
9218 ;; regardless of the element size, so that all input and output bits are
9219 ;; well-defined. Operand 3 then indicates the size of the permute.
9220 (define_insn "@aarch64_sve_trn1_conv<mode>"
9221 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9222 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
9223 (match_operand:VNx16BI 2 "register_operand" "Upa")
9224 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
9227 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
9230 ;; =========================================================================
9232 ;; =========================================================================
9234 ;; -------------------------------------------------------------------------
9235 ;; ---- [INT<-INT] Packs
9236 ;; -------------------------------------------------------------------------
9239 ;; -------------------------------------------------------------------------
9241 ;; Integer pack. Use UZP1 on the narrower type, which discards
9242 ;; the high part of each wide element.
9243 (define_insn "vec_pack_trunc_<Vwide>"
9244 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
9245 (unspec:SVE_FULL_BHSI
9246 [(match_operand:<VWIDE> 1 "register_operand" "w")
9247 (match_operand:<VWIDE> 2 "register_operand" "w")]
9250 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9253 ;; -------------------------------------------------------------------------
9254 ;; ---- [INT<-INT] Unpacks
9255 ;; -------------------------------------------------------------------------
9261 ;; -------------------------------------------------------------------------
9263 ;; Unpack the low or high half of a vector, where "high" refers to
9264 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9265 ;; for little-endian.
9266 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
9267 [(match_operand:<VWIDE> 0 "register_operand")
9269 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
9272 emit_insn ((<hi_lanes_optab>
9273 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
9274 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
9275 (operands[0], operands[1]));
9280 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
9281 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
9283 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
9286 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
9289 ;; -------------------------------------------------------------------------
9290 ;; ---- [INT<-FP] Conversions
9291 ;; -------------------------------------------------------------------------
9295 ;; -------------------------------------------------------------------------
9297 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
9298 ;; SF to SI or DF to DI).
9299 (define_expand "<optab><mode><v_int_equiv>2"
9300 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
9301 (unspec:<V_INT_EQUIV>
9303 (const_int SVE_RELAXED_GP)
9304 (match_operand:SVE_FULL_F 1 "register_operand")]
9308 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9312 ;; Predicated float-to-integer conversion, either to the same width or wider.
9313 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9314 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9315 (unspec:SVE_FULL_HSDI
9316 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9317 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9318 (match_operand:SVE_FULL_F 2 "register_operand")]
9320 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9321 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9322 [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9323 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9327 ;; Predicated narrowing float-to-integer conversion.
9328 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9329 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9331 [(match_operand:VNx2BI 1 "register_operand")
9332 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9333 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9336 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9337 [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9338 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9342 ;; Predicated float-to-integer conversion with merging, either to the same
9344 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9345 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9346 (unspec:SVE_FULL_HSDI
9347 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9348 (unspec:SVE_FULL_HSDI
9350 (const_int SVE_STRICT_GP)
9351 (match_operand:SVE_FULL_F 2 "register_operand")]
9353 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9355 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9358 ;; The first alternative doesn't need the earlyclobber, but the only case
9359 ;; it would help is the uninteresting one in which operands 2 and 3 are
9360 ;; the same register (despite having different modes). Making all the
9361 ;; alternatives earlyclobber makes things more consistent for the
9362 ;; register allocator.
9363 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
9364 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9365 (unspec:SVE_FULL_HSDI
9366 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9367 (unspec:SVE_FULL_HSDI
9369 (const_int SVE_RELAXED_GP)
9370 (match_operand:SVE_FULL_F 2 "register_operand")]
9372 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9374 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9375 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9376 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9377 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9378 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9380 "&& !rtx_equal_p (operands[1], operands[4])"
9382 operands[4] = copy_rtx (operands[1]);
9386 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9387 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9388 (unspec:SVE_FULL_HSDI
9389 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9390 (unspec:SVE_FULL_HSDI
9392 (const_int SVE_STRICT_GP)
9393 (match_operand:SVE_FULL_F 2 "register_operand")]
9395 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9397 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9398 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9399 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9400 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9401 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9405 ;; Predicated narrowing float-to-integer conversion with merging.
9406 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9407 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9409 [(match_operand:VNx2BI 1 "register_operand")
9412 (const_int SVE_STRICT_GP)
9413 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9415 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9420 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9421 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9423 [(match_operand:VNx2BI 1 "register_operand")
9426 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9427 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9429 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9432 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9433 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9434 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9435 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9439 ;; -------------------------------------------------------------------------
9440 ;; ---- [INT<-FP] Packs
9441 ;; -------------------------------------------------------------------------
9442 ;; The patterns in this section are synthetic.
9443 ;; -------------------------------------------------------------------------
9445 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9446 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9450 (const_int SVE_RELAXED_GP)
9451 (match_operand:VNx2DF 1 "register_operand")]
9456 (const_int SVE_RELAXED_GP)
9457 (match_operand:VNx2DF 2 "register_operand")]
9459 (set (match_operand:VNx4SI 0 "register_operand")
9460 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9463 operands[3] = aarch64_ptrue_reg (VNx2BImode);
9464 operands[4] = gen_reg_rtx (VNx4SImode);
9465 operands[5] = gen_reg_rtx (VNx4SImode);
9469 ;; -------------------------------------------------------------------------
9470 ;; ---- [INT<-FP] Unpacks
9471 ;; -------------------------------------------------------------------------
9472 ;; No patterns here yet!
9473 ;; -------------------------------------------------------------------------
9475 ;; -------------------------------------------------------------------------
9476 ;; ---- [FP<-INT] Conversions
9477 ;; -------------------------------------------------------------------------
9481 ;; -------------------------------------------------------------------------
9483 ;; Unpredicated conversion of integers to floats of the same size
9484 ;; (HI to HF, SI to SF or DI to DF).
9485 (define_expand "<optab><v_int_equiv><mode>2"
9486 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9489 (const_int SVE_RELAXED_GP)
9490 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9494 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9498 ;; Predicated integer-to-float conversion, either to the same width or
9500 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9501 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9503 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9504 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9505 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9507 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9508 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9509 [ w , Upl , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9510 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9514 ;; Predicated widening integer-to-float conversion.
9515 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9516 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9518 [(match_operand:VNx2BI 1 "register_operand")
9519 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9520 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9523 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9524 [ w , Upl , 0 ; * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9525 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9529 ;; Predicated integer-to-float conversion with merging, either to the same
9530 ;; width or narrower.
9531 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9532 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9534 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9537 (const_int SVE_STRICT_GP)
9538 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9540 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9542 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9545 ;; The first alternative doesn't need the earlyclobber, but the only case
9546 ;; it would help is the uninteresting one in which operands 2 and 3 are
9547 ;; the same register (despite having different modes). Making all the
9548 ;; alternatives earlyclobber makes things more consistent for the
9549 ;; register allocator.
9550 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9551 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9553 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9556 (const_int SVE_RELAXED_GP)
9557 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9559 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9561 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9562 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9563 [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9564 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9565 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9567 "&& !rtx_equal_p (operands[1], operands[4])"
9569 operands[4] = copy_rtx (operands[1]);
9573 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9574 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9576 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9579 (const_int SVE_STRICT_GP)
9580 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9582 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9584 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9585 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9586 [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9587 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9588 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9592 ;; Predicated widening integer-to-float conversion with merging.
9593 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9594 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9596 [(match_operand:VNx2BI 1 "register_operand")
9599 (const_int SVE_STRICT_GP)
9600 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9602 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9607 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9608 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9610 [(match_operand:VNx2BI 1 "register_operand")
9613 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9614 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9616 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9619 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9620 [ w , Upl , w , 0 ; * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9621 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9622 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9626 ;; -------------------------------------------------------------------------
9627 ;; ---- [FP<-INT] Packs
9628 ;; -------------------------------------------------------------------------
9629 ;; No patterns here yet!
9630 ;; -------------------------------------------------------------------------
9632 ;; -------------------------------------------------------------------------
9633 ;; ---- [FP<-INT] Unpacks
9634 ;; -------------------------------------------------------------------------
9635 ;; The patterns in this section are synthetic.
9636 ;; -------------------------------------------------------------------------
9638 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
9639 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9640 ;; unpacked VNx4SI to VNx2DF.
9641 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9642 [(match_operand:VNx2DF 0 "register_operand")
9644 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9648 /* Use ZIP to do the unpack, since we don't care about the upper halves
9649 and since it has the nice property of not needing any subregs.
9650 If using UUNPK* turns out to be preferable, we could model it as
9651 a ZIP whose first operand is zero. */
9652 rtx temp = gen_reg_rtx (VNx4SImode);
9653 emit_insn ((<hi_lanes_optab>
9654 ? gen_aarch64_sve_zip2vnx4si
9655 : gen_aarch64_sve_zip1vnx4si)
9656 (temp, operands[1], operands[1]));
9657 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9658 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9659 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9660 (operands[0], ptrue, temp, strictness));
9665 ;; -------------------------------------------------------------------------
9666 ;; ---- [FP<-FP] Packs
9667 ;; -------------------------------------------------------------------------
9670 ;; -------------------------------------------------------------------------
9672 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9673 ;; the results into a single vector.
9674 (define_expand "vec_pack_trunc_<Vwide>"
9676 (unspec:SVE_FULL_HSF
9678 (const_int SVE_RELAXED_GP)
9679 (match_operand:<VWIDE> 1 "register_operand")]
9682 (unspec:SVE_FULL_HSF
9684 (const_int SVE_RELAXED_GP)
9685 (match_operand:<VWIDE> 2 "register_operand")]
9687 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9688 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9691 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9692 operands[4] = gen_reg_rtx (<MODE>mode);
9693 operands[5] = gen_reg_rtx (<MODE>mode);
9697 ;; Predicated float-to-float truncation.
9698 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9699 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9700 (unspec:SVE_FULL_HSF
9701 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9702 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9703 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9705 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9706 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9707 [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9708 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9712 ;; Predicated float-to-float truncation with merging.
9713 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9714 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9715 (unspec:SVE_FULL_HSF
9716 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9717 (unspec:SVE_FULL_HSF
9719 (const_int SVE_STRICT_GP)
9720 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9722 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9724 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9727 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9728 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9729 (unspec:SVE_FULL_HSF
9730 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9731 (unspec:SVE_FULL_HSF
9733 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9734 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9736 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9738 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9739 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9740 [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9741 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9742 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9746 ;; -------------------------------------------------------------------------
9747 ;; ---- [FP<-FP] Packs (bfloat16)
9748 ;; -------------------------------------------------------------------------
9752 ;; -------------------------------------------------------------------------
9754 ;; Predicated BFCVT.
9755 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9756 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9758 [(match_operand:VNx4BI 1 "register_operand")
9759 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9760 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9763 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9764 [ w , Upl , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s
9765 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s
9769 ;; Predicated BFCVT with merging.
9770 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9771 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9773 [(match_operand:VNx4BI 1 "register_operand")
9776 (const_int SVE_STRICT_GP)
9777 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9779 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9784 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9785 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9787 [(match_operand:VNx4BI 1 "register_operand")
9790 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9791 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9793 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9796 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9797 [ w , Upl , w , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s
9798 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9799 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s
9803 ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
9804 ;; pair because the even elements always have to be supplied for active
9805 ;; elements, even if the inactive elements don't matter.
9807 ;; This instructions does not take MOVPRFX.
9808 (define_insn "@aarch64_sve_cvtnt<mode>"
9809 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9811 [(match_operand:VNx4BI 2 "register_operand" "Upl")
9812 (const_int SVE_STRICT_GP)
9813 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9814 (match_operand:VNx4SF 3 "register_operand" "w")]
9815 UNSPEC_COND_FCVTNT))]
9817 "bfcvtnt\t%0.h, %2/m, %3.s"
9820 ;; -------------------------------------------------------------------------
9821 ;; ---- [FP<-FP] Unpacks
9822 ;; -------------------------------------------------------------------------
9825 ;; -------------------------------------------------------------------------
9827 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9828 ;; First unpack the source without conversion, then float-convert the
9830 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9831 [(match_operand:<VWIDE> 0 "register_operand")
9832 (unspec:SVE_FULL_HSF
9833 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9837 /* Use ZIP to do the unpack, since we don't care about the upper halves
9838 and since it has the nice property of not needing any subregs.
9839 If using UUNPK* turns out to be preferable, we could model it as
9840 a ZIP whose first operand is zero. */
9841 rtx temp = gen_reg_rtx (<MODE>mode);
9842 emit_insn ((<hi_lanes_optab>
9843 ? gen_aarch64_sve_zip2<mode>
9844 : gen_aarch64_sve_zip1<mode>)
9845 (temp, operands[1], operands[1]));
9846 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9847 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9848 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9849 (operands[0], ptrue, temp, strictness));
9854 ;; Predicated float-to-float extension.
9855 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9856 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9857 (unspec:SVE_FULL_SDF
9858 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9859 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9860 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9862 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9863 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9864 [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9865 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9869 ;; Predicated float-to-float extension with merging.
9870 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9871 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9872 (unspec:SVE_FULL_SDF
9873 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9874 (unspec:SVE_FULL_SDF
9876 (const_int SVE_STRICT_GP)
9877 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9879 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9881 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9884 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9885 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9886 (unspec:SVE_FULL_SDF
9887 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9888 (unspec:SVE_FULL_SDF
9890 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9891 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9893 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9895 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9896 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9897 [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9898 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9899 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9903 ;; -------------------------------------------------------------------------
9904 ;; ---- [PRED<-PRED] Packs
9905 ;; -------------------------------------------------------------------------
9908 ;; -------------------------------------------------------------------------
9910 ;; Predicate pack. Use UZP1 on the narrower type, which discards
9911 ;; the high part of each wide element.
9912 (define_insn "vec_pack_trunc_<Vwide>"
9913 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9915 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9916 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9919 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9922 ;; -------------------------------------------------------------------------
9923 ;; ---- [PRED<-PRED] Unpacks
9924 ;; -------------------------------------------------------------------------
9928 ;; -------------------------------------------------------------------------
9930 ;; Unpack the low or high half of a predicate, where "high" refers to
9931 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9932 ;; for little-endian.
9933 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9934 [(match_operand:<VWIDE> 0 "register_operand")
9935 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9939 emit_insn ((<hi_lanes_optab>
9940 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9941 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9942 (operands[0], operands[1]));
9947 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
9948 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9949 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9952 "punpk<perm_hilo>\t%0.h, %1.b"
9955 ;; =========================================================================
9956 ;; == Vector partitioning
9957 ;; =========================================================================
9959 ;; -------------------------------------------------------------------------
9960 ;; ---- [PRED] Unary partitioning
9961 ;; -------------------------------------------------------------------------
9967 ;; -------------------------------------------------------------------------
9969 ;; Note that unlike most other instructions that have both merging and
9970 ;; zeroing forms, these instructions don't operate elementwise and so
9971 ;; don't fit the IFN_COND model.
9972 (define_insn "@aarch64_brk<brk_op>"
9973 [(set (match_operand:VNx16BI 0 "register_operand")
9975 [(match_operand:VNx16BI 1 "register_operand")
9976 (match_operand:VNx16BI 2 "register_operand")
9977 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero")]
9980 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
9981 [ &Upa , Upa , Upa , Dz; yes ] brk<brk_op>\t%0.b, %1/z, %2.b
9982 [ ?Upa , 0Upa, 0Upa, Dz; yes ] ^
9983 [ Upa , Upa , Upa , Dz; no ] ^
9984 [ &Upa , Upa , Upa , 0 ; yes ] brk<brk_op>\t%0.b, %1/m, %2.b
9985 [ ?Upa , 0Upa, 0Upa, 0 ; yes ] ^
9986 [ Upa , Upa , Upa , 0 ; no ] ^
9990 ;; Same, but also producing a flags result.
9991 (define_insn "*aarch64_brk<brk_op>_cc"
9992 [(set (reg:CC_NZC CC_REGNUM)
9994 [(match_operand:VNx16BI 1 "register_operand")
9996 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9999 (match_operand:VNx16BI 2 "register_operand")
10000 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10003 (set (match_operand:VNx16BI 0 "register_operand")
10010 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
10011 [ &Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b
10012 [ ?Upa , 0Upa, 0Upa; yes ] ^
10013 [ Upa , Upa , Upa ; no ] ^
10017 ;; Same, but with only the flags result being interesting.
10018 (define_insn "*aarch64_brk<brk_op>_ptest"
10019 [(set (reg:CC_NZC CC_REGNUM)
10021 [(match_operand:VNx16BI 1 "register_operand")
10023 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10026 (match_operand:VNx16BI 2 "register_operand")
10027 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10030 (clobber (match_scratch:VNx16BI 0))]
10032 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
10033 [ &Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b
10034 [ ?Upa , 0Upa, 0Upa; yes ] ^
10035 [ Upa , Upa , Upa ; no ] ^
10039 ;; -------------------------------------------------------------------------
10040 ;; ---- [PRED] Binary partitioning
10041 ;; -------------------------------------------------------------------------
10049 ;; -------------------------------------------------------------------------
10051 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
10052 (define_insn "@aarch64_brk<brk_op>"
10053 [(set (match_operand:VNx16BI 0 "register_operand")
10055 [(match_operand:VNx16BI 1 "register_operand")
10056 (match_operand:VNx16BI 2 "register_operand")
10057 (match_operand:VNx16BI 3 "register_operand")]
10060 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
10061 [ &Upa , Upa , Upa , <brk_reg_con> ; yes ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
10062 [ ?Upa , 0Upa, 0Upa, 0<brk_reg_con>; yes ] ^
10063 [ Upa , Upa , Upa , <brk_reg_con> ; no ] ^
10067 ;; BRKN, producing both a predicate and a flags result. Unlike other
10068 ;; flag-setting instructions, these flags are always set wrt a ptrue.
10069 (define_insn_and_rewrite "*aarch64_brkn_cc"
10070 [(set (reg:CC_NZC CC_REGNUM)
10072 [(match_operand:VNx16BI 4)
10073 (match_operand:VNx16BI 5)
10074 (const_int SVE_KNOWN_PTRUE)
10076 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10077 (match_operand:VNx16BI 2 "register_operand" "Upa")
10078 (match_operand:VNx16BI 3 "register_operand" "0")]
10081 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
10088 "brkns\t%0.b, %1/z, %2.b, %0.b"
10089 "&& (operands[4] != CONST0_RTX (VNx16BImode)
10090 || operands[5] != CONST0_RTX (VNx16BImode))"
10092 operands[4] = CONST0_RTX (VNx16BImode);
10093 operands[5] = CONST0_RTX (VNx16BImode);
10097 ;; Same, but with only the flags result being interesting.
10098 (define_insn_and_rewrite "*aarch64_brkn_ptest"
10099 [(set (reg:CC_NZC CC_REGNUM)
10101 [(match_operand:VNx16BI 4)
10102 (match_operand:VNx16BI 5)
10103 (const_int SVE_KNOWN_PTRUE)
10105 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10106 (match_operand:VNx16BI 2 "register_operand" "Upa")
10107 (match_operand:VNx16BI 3 "register_operand" "0")]
10110 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
10112 "brkns\t%0.b, %1/z, %2.b, %0.b"
10113 "&& (operands[4] != CONST0_RTX (VNx16BImode)
10114 || operands[5] != CONST0_RTX (VNx16BImode))"
10116 operands[4] = CONST0_RTX (VNx16BImode);
10117 operands[5] = CONST0_RTX (VNx16BImode);
10121 ;; BRKPA and BRKPB, producing both a predicate and a flags result.
10122 (define_insn "*aarch64_brk<brk_op>_cc"
10123 [(set (reg:CC_NZC CC_REGNUM)
10125 [(match_operand:VNx16BI 1 "register_operand")
10127 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10130 (match_operand:VNx16BI 2 "register_operand")
10131 (match_operand:VNx16BI 3 "register_operand")]
10134 (set (match_operand:VNx16BI 0 "register_operand")
10141 {@ [ cons: =0, 1 , 2 , 3 , 4; attrs: pred_clobber ]
10142 [ &Upa , Upa , Upa , Upa , ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10143 [ ?Upa , 0Upa, 0Upa, 0Upa, ; yes ] ^
10144 [ Upa , Upa , Upa , Upa , ; no ] ^
10148 ;; Same, but with only the flags result being interesting.
10149 (define_insn "*aarch64_brk<brk_op>_ptest"
10150 [(set (reg:CC_NZC CC_REGNUM)
10152 [(match_operand:VNx16BI 1 "register_operand")
10154 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10157 (match_operand:VNx16BI 2 "register_operand")
10158 (match_operand:VNx16BI 3 "register_operand")]
10161 (clobber (match_scratch:VNx16BI 0))]
10163 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
10164 [ &Upa , Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10165 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
10166 [ Upa , Upa , Upa , Upa ; no ] ^
10170 ;; -------------------------------------------------------------------------
10171 ;; ---- [PRED] Scalarization
10172 ;; -------------------------------------------------------------------------
10176 ;; -------------------------------------------------------------------------
10178 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
10179 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10181 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
10182 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10183 (match_operand:PRED_ALL 3 "register_operand" "0")]
10185 (clobber (reg:CC_NZC CC_REGNUM))]
10186 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
10187 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10190 ;; Same, but also producing a flags result.
10191 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
10192 [(set (reg:CC_NZC CC_REGNUM)
10194 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10196 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10199 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10200 (match_operand:PRED_ALL 6 "register_operand" "0")]
10203 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10210 && <max_elem_bits> >= <elem_bits>
10211 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10212 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10213 "&& !rtx_equal_p (operands[2], operands[4])"
10215 operands[4] = operands[2];
10216 operands[5] = operands[3];
10220 ;; Same, but with only the flags result being interesting.
10221 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
10222 [(set (reg:CC_NZC CC_REGNUM)
10224 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10226 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10229 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10230 (match_operand:PRED_ALL 6 "register_operand" "0")]
10233 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
10235 && <max_elem_bits> >= <elem_bits>
10236 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10237 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10238 "&& !rtx_equal_p (operands[2], operands[4])"
10240 operands[4] = operands[2];
10241 operands[5] = operands[3];
10245 ;; =========================================================================
10246 ;; == Counting elements
10247 ;; =========================================================================
10249 ;; -------------------------------------------------------------------------
10250 ;; ---- [INT] Count elements in a pattern (scalar)
10251 ;; -------------------------------------------------------------------------
10257 ;; -------------------------------------------------------------------------
10259 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
10260 ;; operand 2 is the number of elements that fit in a 128-bit block, and
10261 ;; operand 3 is a multiplier in the range [1, 16].
10263 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
10264 (define_insn "aarch64_sve_cnt_pat"
10265 [(set (match_operand:DI 0 "register_operand" "=r")
10267 (unspec:SI [(match_operand:DI 1 "const_int_operand")
10268 (match_operand:DI 2 "const_int_operand")
10269 (match_operand:DI 3 "const_int_operand")]
10270 UNSPEC_SVE_CNT_PAT)))]
10273 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
10277 ;; -------------------------------------------------------------------------
10278 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
10279 ;; -------------------------------------------------------------------------
10284 ;; -------------------------------------------------------------------------
10286 ;; Increment a DImode register by the number of elements in an svpattern.
10287 ;; See aarch64_sve_cnt_pat for the counting behavior.
10288 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10289 [(set (match_operand:DI 0 "register_operand" "=r")
10290 (ANY_PLUS:DI (zero_extend:DI
10291 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10292 (match_operand:DI 3 "const_int_operand")
10293 (match_operand:DI 4 "const_int_operand")]
10294 UNSPEC_SVE_CNT_PAT))
10295 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10298 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10303 ;; Increment an SImode register by the number of elements in an svpattern
10304 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10306 (define_insn "*aarch64_sve_incsi_pat"
10307 [(set (match_operand:SI 0 "register_operand" "=r")
10308 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
10309 (match_operand:DI 3 "const_int_operand")
10310 (match_operand:DI 4 "const_int_operand")]
10311 UNSPEC_SVE_CNT_PAT)
10312 (match_operand:SI 1 "register_operand" "0")))]
10315 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
10319 ;; Increment an SImode register by the number of elements in an svpattern
10320 ;; using saturating arithmetic, extending the result to 64 bits.
10322 ;; See aarch64_sve_cnt_pat for the counting behavior.
10323 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10324 [(set (match_operand:DI 0 "register_operand" "=r")
10325 (<paired_extend>:DI
10327 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10328 (match_operand:DI 3 "const_int_operand")
10329 (match_operand:DI 4 "const_int_operand")]
10330 UNSPEC_SVE_CNT_PAT)
10331 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10334 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
10335 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10340 ;; -------------------------------------------------------------------------
10341 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
10342 ;; -------------------------------------------------------------------------
10347 ;; -------------------------------------------------------------------------
10349 ;; Increment a vector of DIs by the number of elements in an svpattern.
10350 ;; See aarch64_sve_cnt_pat for the counting behavior.
10351 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10352 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10354 (vec_duplicate:VNx2DI
10356 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10357 (match_operand:DI 3 "const_int_operand")
10358 (match_operand:DI 4 "const_int_operand")]
10359 UNSPEC_SVE_CNT_PAT)))
10360 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10363 if (which_alternative == 1)
10364 output_asm_insn ("movprfx\t%0, %1", operands);
10365 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10368 [(set_attr "movprfx" "*,yes")]
10371 ;; Increment a vector of SIs by the number of elements in an svpattern.
10372 ;; See aarch64_sve_cnt_pat for the counting behavior.
10373 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10374 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10376 (vec_duplicate:VNx4SI
10377 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10378 (match_operand:DI 3 "const_int_operand")
10379 (match_operand:DI 4 "const_int_operand")]
10380 UNSPEC_SVE_CNT_PAT))
10381 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10384 if (which_alternative == 1)
10385 output_asm_insn ("movprfx\t%0, %1", operands);
10386 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10389 [(set_attr "movprfx" "*,yes")]
10392 ;; Increment a vector of HIs by the number of elements in an svpattern.
10393 ;; See aarch64_sve_cnt_pat for the counting behavior.
10394 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10395 [(set (match_operand:VNx8HI 0 "register_operand")
10397 (vec_duplicate:VNx8HI
10399 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10400 (match_operand:DI 3 "const_int_operand")
10401 (match_operand:DI 4 "const_int_operand")]
10402 UNSPEC_SVE_CNT_PAT)))
10403 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10407 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10408 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10410 (vec_duplicate:VNx8HI
10411 (match_operator:HI 5 "subreg_lowpart_operator"
10412 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10413 (match_operand:DI 3 "const_int_operand")
10414 (match_operand:DI 4 "const_int_operand")]
10415 UNSPEC_SVE_CNT_PAT)]))
10416 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10419 if (which_alternative == 1)
10420 output_asm_insn ("movprfx\t%0, %1", operands);
10421 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10424 [(set_attr "movprfx" "*,yes")]
10427 ;; -------------------------------------------------------------------------
10428 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
10429 ;; -------------------------------------------------------------------------
10434 ;; -------------------------------------------------------------------------
10436 ;; Decrement a DImode register by the number of elements in an svpattern.
10437 ;; See aarch64_sve_cnt_pat for the counting behavior.
10438 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10439 [(set (match_operand:DI 0 "register_operand" "=r")
10440 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
10442 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10443 (match_operand:DI 3 "const_int_operand")
10444 (match_operand:DI 4 "const_int_operand")]
10445 UNSPEC_SVE_CNT_PAT))))]
10448 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10453 ;; Decrement an SImode register by the number of elements in an svpattern
10454 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10456 (define_insn "*aarch64_sve_decsi_pat"
10457 [(set (match_operand:SI 0 "register_operand" "=r")
10458 (minus:SI (match_operand:SI 1 "register_operand" "0")
10459 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10460 (match_operand:DI 3 "const_int_operand")
10461 (match_operand:DI 4 "const_int_operand")]
10462 UNSPEC_SVE_CNT_PAT)))]
10465 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10469 ;; Decrement an SImode register by the number of elements in an svpattern
10470 ;; using saturating arithmetic, extending the result to 64 bits.
10472 ;; See aarch64_sve_cnt_pat for the counting behavior.
10473 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10474 [(set (match_operand:DI 0 "register_operand" "=r")
10475 (<paired_extend>:DI
10477 (match_operand:SI_ONLY 1 "register_operand" "0")
10478 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10479 (match_operand:DI 3 "const_int_operand")
10480 (match_operand:DI 4 "const_int_operand")]
10481 UNSPEC_SVE_CNT_PAT))))]
10484 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10485 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10490 ;; -------------------------------------------------------------------------
10491 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10492 ;; -------------------------------------------------------------------------
10497 ;; -------------------------------------------------------------------------
10499 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10500 ;; See aarch64_sve_cnt_pat for the counting behavior.
10501 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10502 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10504 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10505 (vec_duplicate:VNx2DI
10507 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10508 (match_operand:DI 3 "const_int_operand")
10509 (match_operand:DI 4 "const_int_operand")]
10510 UNSPEC_SVE_CNT_PAT)))))]
10513 if (which_alternative == 1)
10514 output_asm_insn ("movprfx\t%0, %1", operands);
10515 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10518 [(set_attr "movprfx" "*,yes")]
10521 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10522 ;; See aarch64_sve_cnt_pat for the counting behavior.
10523 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10524 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10526 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10527 (vec_duplicate:VNx4SI
10528 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10529 (match_operand:DI 3 "const_int_operand")
10530 (match_operand:DI 4 "const_int_operand")]
10531 UNSPEC_SVE_CNT_PAT))))]
10534 if (which_alternative == 1)
10535 output_asm_insn ("movprfx\t%0, %1", operands);
10536 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10539 [(set_attr "movprfx" "*,yes")]
10542 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10543 ;; See aarch64_sve_cnt_pat for the counting behavior.
10544 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10545 [(set (match_operand:VNx8HI 0 "register_operand")
10547 (match_operand:VNx8HI_ONLY 1 "register_operand")
10548 (vec_duplicate:VNx8HI
10550 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10551 (match_operand:DI 3 "const_int_operand")
10552 (match_operand:DI 4 "const_int_operand")]
10553 UNSPEC_SVE_CNT_PAT)))))]
10557 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10558 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10560 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10561 (vec_duplicate:VNx8HI
10562 (match_operator:HI 5 "subreg_lowpart_operator"
10563 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10564 (match_operand:DI 3 "const_int_operand")
10565 (match_operand:DI 4 "const_int_operand")]
10566 UNSPEC_SVE_CNT_PAT)]))))]
10569 if (which_alternative == 1)
10570 output_asm_insn ("movprfx\t%0, %1", operands);
10571 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10574 [(set_attr "movprfx" "*,yes")]
10577 ;; -------------------------------------------------------------------------
10578 ;; ---- [INT] Count elements in a predicate (scalar)
10579 ;; -------------------------------------------------------------------------
10582 ;; -------------------------------------------------------------------------
10584 ;; Count the number of set bits in a predicate. Operand 3 is true if
10585 ;; operand 1 is known to be all-true.
10586 (define_insn "@aarch64_pred_cntp<mode>"
10587 [(set (match_operand:DI 0 "register_operand" "=r")
10589 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10590 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10591 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10594 "cntp\t%x0, %1, %3.<Vetype>")
10596 ;; -------------------------------------------------------------------------
10597 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10598 ;; -------------------------------------------------------------------------
10603 ;; -------------------------------------------------------------------------
10605 ;; Increment a DImode register by the number of set bits in a predicate.
10606 ;; See aarch64_sve_cntp for a description of the operands.
10607 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10608 [(set (match_operand:DI 0 "register_operand")
10611 (unspec:SI [(match_dup 3)
10612 (const_int SVE_KNOWN_PTRUE)
10613 (match_operand:PRED_ALL 2 "register_operand")]
10615 (match_operand:DI_ONLY 1 "register_operand")))]
10618 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10622 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10623 [(set (match_operand:DI 0 "register_operand" "=r")
10626 (unspec:SI [(match_operand 3)
10627 (const_int SVE_KNOWN_PTRUE)
10628 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10630 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10632 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10633 "&& !CONSTANT_P (operands[3])"
10635 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10639 ;; Increment an SImode register by the number of set bits in a predicate
10640 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
10642 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10643 [(set (match_operand:SI 0 "register_operand" "=r")
10645 (unspec:SI [(match_operand 3)
10646 (const_int SVE_KNOWN_PTRUE)
10647 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10649 (match_operand:SI 1 "register_operand" "0")))]
10651 "incp\t%x0, %2.<Vetype>"
10652 "&& !CONSTANT_P (operands[3])"
10654 operands[3] = CONSTM1_RTX (<MODE>mode);
10658 ;; Increment an SImode register by the number of set bits in a predicate
10659 ;; using saturating arithmetic, extending the result to 64 bits.
10661 ;; See aarch64_sve_cntp for a description of the operands.
10662 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10663 [(set (match_operand:DI 0 "register_operand")
10664 (<paired_extend>:DI
10666 (unspec:SI [(match_dup 3)
10667 (const_int SVE_KNOWN_PTRUE)
10668 (match_operand:PRED_ALL 2 "register_operand")]
10670 (match_operand:SI_ONLY 1 "register_operand"))))]
10673 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10677 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10678 [(set (match_operand:DI 0 "register_operand" "=r")
10679 (<paired_extend>:DI
10681 (unspec:SI [(match_operand 3)
10682 (const_int SVE_KNOWN_PTRUE)
10683 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10685 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10688 if (<CODE> == SS_PLUS)
10689 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10691 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10693 "&& !CONSTANT_P (operands[3])"
10695 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10699 ;; -------------------------------------------------------------------------
10700 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10701 ;; -------------------------------------------------------------------------
10706 ;; -------------------------------------------------------------------------
10708 ;; Increment a vector of DIs by the number of set bits in a predicate.
10709 ;; See aarch64_sve_cntp for a description of the operands.
10710 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10711 [(set (match_operand:VNx2DI 0 "register_operand")
10713 (vec_duplicate:VNx2DI
10717 (const_int SVE_KNOWN_PTRUE)
10718 (match_operand:<VPRED> 2 "register_operand")]
10720 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10723 operands[3] = CONSTM1_RTX (<VPRED>mode);
10727 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10728 [(set (match_operand:VNx2DI 0 "register_operand")
10730 (vec_duplicate:VNx2DI
10734 (const_int SVE_KNOWN_PTRUE)
10735 (match_operand:<VPRED> 2 "register_operand")]
10737 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10739 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10740 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.d, %2
10741 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10743 "&& !CONSTANT_P (operands[3])"
10745 operands[3] = CONSTM1_RTX (<VPRED>mode);
10749 ;; Increment a vector of SIs by the number of set bits in a predicate.
10750 ;; See aarch64_sve_cntp for a description of the operands.
10751 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10752 [(set (match_operand:VNx4SI 0 "register_operand")
10754 (vec_duplicate:VNx4SI
10757 (const_int SVE_KNOWN_PTRUE)
10758 (match_operand:<VPRED> 2 "register_operand")]
10760 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10763 operands[3] = CONSTM1_RTX (<VPRED>mode);
10767 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10768 [(set (match_operand:VNx4SI 0 "register_operand")
10770 (vec_duplicate:VNx4SI
10773 (const_int SVE_KNOWN_PTRUE)
10774 (match_operand:<VPRED> 2 "register_operand")]
10776 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10778 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10779 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.s, %2
10780 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
10782 "&& !CONSTANT_P (operands[3])"
10784 operands[3] = CONSTM1_RTX (<VPRED>mode);
10788 ;; Increment a vector of HIs by the number of set bits in a predicate.
10789 ;; See aarch64_sve_cntp for a description of the operands.
10790 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10791 [(set (match_operand:VNx8HI 0 "register_operand")
10793 (vec_duplicate:VNx8HI
10797 (const_int SVE_KNOWN_PTRUE)
10798 (match_operand:<VPRED> 2 "register_operand")]
10800 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10803 operands[3] = CONSTM1_RTX (<VPRED>mode);
10807 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10808 [(set (match_operand:VNx8HI 0 "register_operand")
10810 (vec_duplicate:VNx8HI
10811 (match_operator:HI 3 "subreg_lowpart_operator"
10814 (const_int SVE_KNOWN_PTRUE)
10815 (match_operand:<VPRED> 2 "register_operand")]
10817 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10819 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10820 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.h, %2
10821 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
10823 "&& !CONSTANT_P (operands[4])"
10825 operands[4] = CONSTM1_RTX (<VPRED>mode);
10829 ;; -------------------------------------------------------------------------
10830 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10831 ;; -------------------------------------------------------------------------
10836 ;; -------------------------------------------------------------------------
10838 ;; Decrement a DImode register by the number of set bits in a predicate.
10839 ;; See aarch64_sve_cntp for a description of the operands.
10840 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10841 [(set (match_operand:DI 0 "register_operand")
10843 (match_operand:DI_ONLY 1 "register_operand")
10845 (unspec:SI [(match_dup 3)
10846 (const_int SVE_KNOWN_PTRUE)
10847 (match_operand:PRED_ALL 2 "register_operand")]
10851 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10855 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10856 [(set (match_operand:DI 0 "register_operand" "=r")
10858 (match_operand:DI_ONLY 1 "register_operand" "0")
10860 (unspec:SI [(match_operand 3)
10861 (const_int SVE_KNOWN_PTRUE)
10862 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10865 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10866 "&& !CONSTANT_P (operands[3])"
10868 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10872 ;; Decrement an SImode register by the number of set bits in a predicate
10873 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
10875 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10876 [(set (match_operand:SI 0 "register_operand" "=r")
10878 (match_operand:SI 1 "register_operand" "0")
10879 (unspec:SI [(match_operand 3)
10880 (const_int SVE_KNOWN_PTRUE)
10881 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10884 "decp\t%x0, %2.<Vetype>"
10885 "&& !CONSTANT_P (operands[3])"
10887 operands[3] = CONSTM1_RTX (<MODE>mode);
10891 ;; Decrement an SImode register by the number of set bits in a predicate
10892 ;; using saturating arithmetic, extending the result to 64 bits.
10894 ;; See aarch64_sve_cntp for a description of the operands.
10895 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10896 [(set (match_operand:DI 0 "register_operand")
10897 (<paired_extend>:DI
10899 (match_operand:SI_ONLY 1 "register_operand")
10900 (unspec:SI [(match_dup 3)
10901 (const_int SVE_KNOWN_PTRUE)
10902 (match_operand:PRED_ALL 2 "register_operand")]
10906 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10910 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10911 [(set (match_operand:DI 0 "register_operand" "=r")
10912 (<paired_extend>:DI
10914 (match_operand:SI_ONLY 1 "register_operand" "0")
10915 (unspec:SI [(match_operand 3)
10916 (const_int SVE_KNOWN_PTRUE)
10917 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10921 if (<CODE> == SS_MINUS)
10922 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10924 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10926 "&& !CONSTANT_P (operands[3])"
10928 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10932 ;; -------------------------------------------------------------------------
10933 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10934 ;; -------------------------------------------------------------------------
10939 ;; -------------------------------------------------------------------------
10941 ;; Decrement a vector of DIs by the number of set bits in a predicate.
10942 ;; See aarch64_sve_cntp for a description of the operands.
10943 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10944 [(set (match_operand:VNx2DI 0 "register_operand")
10946 (match_operand:VNx2DI_ONLY 1 "register_operand")
10947 (vec_duplicate:VNx2DI
10951 (const_int SVE_KNOWN_PTRUE)
10952 (match_operand:<VPRED> 2 "register_operand")]
10956 operands[3] = CONSTM1_RTX (<VPRED>mode);
10960 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10961 [(set (match_operand:VNx2DI 0 "register_operand")
10963 (match_operand:VNx2DI_ONLY 1 "register_operand")
10964 (vec_duplicate:VNx2DI
10968 (const_int SVE_KNOWN_PTRUE)
10969 (match_operand:<VPRED> 2 "register_operand")]
10972 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10973 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.d, %2
10974 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10976 "&& !CONSTANT_P (operands[3])"
10978 operands[3] = CONSTM1_RTX (<VPRED>mode);
10982 ;; Decrement a vector of SIs by the number of set bits in a predicate.
10983 ;; See aarch64_sve_cntp for a description of the operands.
10984 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10985 [(set (match_operand:VNx4SI 0 "register_operand")
10987 (match_operand:VNx4SI_ONLY 1 "register_operand")
10988 (vec_duplicate:VNx4SI
10991 (const_int SVE_KNOWN_PTRUE)
10992 (match_operand:<VPRED> 2 "register_operand")]
10996 operands[3] = CONSTM1_RTX (<VPRED>mode);
11000 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11001 [(set (match_operand:VNx4SI 0 "register_operand")
11003 (match_operand:VNx4SI_ONLY 1 "register_operand")
11004 (vec_duplicate:VNx4SI
11007 (const_int SVE_KNOWN_PTRUE)
11008 (match_operand:<VPRED> 2 "register_operand")]
11011 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
11012 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.s, %2
11013 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
11015 "&& !CONSTANT_P (operands[3])"
11017 operands[3] = CONSTM1_RTX (<VPRED>mode);
11021 ;; Decrement a vector of HIs by the number of set bits in a predicate.
11022 ;; See aarch64_sve_cntp for a description of the operands.
11023 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11024 [(set (match_operand:VNx8HI 0 "register_operand")
11026 (match_operand:VNx8HI_ONLY 1 "register_operand")
11027 (vec_duplicate:VNx8HI
11031 (const_int SVE_KNOWN_PTRUE)
11032 (match_operand:<VPRED> 2 "register_operand")]
11036 operands[3] = CONSTM1_RTX (<VPRED>mode);
11040 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11041 [(set (match_operand:VNx8HI 0 "register_operand")
11043 (match_operand:VNx8HI_ONLY 1 "register_operand")
11044 (vec_duplicate:VNx8HI
11045 (match_operator:HI 3 "subreg_lowpart_operator"
11048 (const_int SVE_KNOWN_PTRUE)
11049 (match_operand:<VPRED> 2 "register_operand")]
11052 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
11053 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.h, %2
11054 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
11056 "&& !CONSTANT_P (operands[4])"
11058 operands[4] = CONSTM1_RTX (<VPRED>mode);
11062 (define_insn_and_split "@aarch64_sve_get_neonq_<mode>"
11063 [(set (match_operand:<V128> 0 "register_operand" "=w")
11065 (match_operand:SVE_FULL 1 "register_operand" "w")
11066 (match_operand 2 "descending_int_parallel")))]
11068 && BYTES_BIG_ENDIAN
11069 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
11070 GET_MODE_NUNITS (<V128>mode) - 1)"
11072 "&& reload_completed"
11073 [(set (match_dup 0) (match_dup 1))]
11075 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
11079 (define_insn "@aarch64_sve_set_neonq_<mode>"
11080 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
11082 [(match_operand:SVE_FULL 1 "register_operand" "w")
11083 (match_operand:<V128> 2 "register_operand" "w")
11084 (match_operand:<VPRED> 3 "register_operand" "Upl")]
11085 UNSPEC_SET_NEONQ))]
11087 && BYTES_BIG_ENDIAN"
11089 operands[2] = lowpart_subreg (<MODE>mode, operands[2],
11090 GET_MODE (operands[2]));
11091 return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";