gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves of multiple predicates
  37 ;; ---- Moves relating to the FFR
  38 ;;
  39 ;; == Loads
  40 ;; ---- Normal contiguous loads
  41 ;; ---- Extending contiguous loads
  42 ;; ---- First-faulting contiguous loads
  43 ;; ---- First-faulting extending contiguous loads
  44 ;; ---- Non-temporal contiguous loads
  45 ;; ---- Normal gather loads
  46 ;; ---- Extending gather loads
  47 ;; ---- First-faulting gather loads
  48 ;; ---- First-faulting extending gather loads
  49 ;;
  50 ;; == Prefetches
  51 ;; ---- Contiguous prefetches
  52 ;; ---- Gather prefetches
  53 ;;
  54 ;; == Stores
  55 ;; ---- Normal contiguous stores
  56 ;; ---- Truncating contiguous stores
  57 ;; ---- Non-temporal contiguous stores
  58 ;; ---- Normal scatter stores
  59 ;; ---- Truncating scatter stores
  60 ;;
  61 ;; == Vector creation
  62 ;; ---- [INT,FP] Duplicate element
  63 ;; ---- [INT,FP] Initialize from individual elements
  64 ;; ---- [INT] Linear series
  65 ;; ---- [PRED] Duplicate element
  66 ;;
  67 ;; == Vector decomposition
  68 ;; ---- [INT,FP] Extract index
  69 ;; ---- [INT,FP] Extract active element
  70 ;; ---- [PRED] Extract index
  71 ;;
  72 ;; == Unary arithmetic
  73 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  74 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  75 ;; ---- [INT] Sign and zero extension
  76 ;; ---- [INT] Truncation
  77 ;; ---- [INT] Logical inverse
  78 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  79 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  80 ;; ---- [FP] Square root
  81 ;; ---- [FP] Reciprocal square root
  82 ;; ---- [PRED] Inverse
  83
  84 ;; == Binary arithmetic
  85 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  86 ;; ---- [INT] Addition
  87 ;; ---- [INT] Subtraction
  88 ;; ---- [INT] Take address
  89 ;; ---- [INT] Absolute difference
  90 ;; ---- [INT] Saturating addition and subtraction
  91 ;; ---- [INT] Highpart multiplication
  92 ;; ---- [INT] Division
  93 ;; ---- [INT] Binary logical operations
  94 ;; ---- [INT] Binary logical operations (inverted second input)
  95 ;; ---- [INT] Shifts (rounding towards -Inf)
  96 ;; ---- [INT] Shifts (rounding towards 0)
  97 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  98 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  99 ;; ---- [FP] General binary arithmetic corresponding to unspecs
 100 ;; ---- [FP] Addition
 101 ;; ---- [FP] Complex addition
 102 ;; ---- [FP] Subtraction
 103 ;; ---- [FP] Absolute difference
 104 ;; ---- [FP] Multiplication
 105 ;; ---- [FP] Division
 106 ;; ---- [FP] Binary logical operations
 107 ;; ---- [FP] Sign copying
 108 ;; ---- [FP] Maximum and minimum
 109 ;; ---- [PRED] Binary logical operations
 110 ;; ---- [PRED] Binary logical operations (inverted second input)
 111 ;; ---- [PRED] Binary logical operations (inverted result)
 112 ;;
 113 ;; == Ternary arithmetic
 114 ;; ---- [INT] MLA and MAD
 115 ;; ---- [INT] MLS and MSB
 116 ;; ---- [INT] Dot product
 117 ;; ---- [INT] Sum of absolute differences
 118 ;; ---- [INT] Matrix multiply-accumulate
 119 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 120 ;; ---- [FP] Complex multiply-add
 121 ;; ---- [FP] Trigonometric multiply-add
 122 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 123 ;; ---- [FP] Matrix multiply-accumulate
 124 ;;
 125 ;; == Comparisons and selects
 126 ;; ---- [INT,FP] Select based on predicates
 127 ;; ---- [INT,FP] Compare and select
 128 ;; ---- [INT] Comparisons
 129 ;; ---- [INT] While tests
 130 ;; ---- [FP] Direct comparisons
 131 ;; ---- [FP] Absolute comparisons
 132 ;; ---- [PRED] Select
 133 ;; ---- [PRED] Test bits
 134 ;;
 135 ;; == Reductions
 136 ;; ---- [INT,FP] Conditional reductions
 137 ;; ---- [INT] Tree reductions
 138 ;; ---- [FP] Tree reductions
 139 ;; ---- [FP] Left-to-right reductions
 140 ;;
 141 ;; == Permutes
 142 ;; ---- [INT,FP] General permutes
 143 ;; ---- [INT,FP] Special-purpose unary permutes
 144 ;; ---- [INT,FP] Special-purpose binary permutes
 145 ;; ---- [PRED] Special-purpose unary permutes
 146 ;; ---- [PRED] Special-purpose binary permutes
 147 ;;
 148 ;; == Conversions
 149 ;; ---- [INT<-INT] Packs
 150 ;; ---- [INT<-INT] Unpacks
 151 ;; ---- [INT<-FP] Conversions
 152 ;; ---- [INT<-FP] Packs
 153 ;; ---- [INT<-FP] Unpacks
 154 ;; ---- [FP<-INT] Conversions
 155 ;; ---- [FP<-INT] Packs
 156 ;; ---- [FP<-INT] Unpacks
 157 ;; ---- [FP<-FP] Packs
 158 ;; ---- [FP<-FP] Packs (bfloat16)
 159 ;; ---- [FP<-FP] Unpacks
 160 ;; ---- [PRED<-PRED] Packs
 161 ;; ---- [PRED<-PRED] Unpacks
 162 ;;
 163 ;; == Vector partitioning
 164 ;; ---- [PRED] Unary partitioning
 165 ;; ---- [PRED] Binary partitioning
 166 ;; ---- [PRED] Scalarization
 167 ;;
 168 ;; == Counting elements
 169 ;; ---- [INT] Count elements in a pattern (scalar)
 170 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 171 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 172 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 173 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 174 ;; ---- [INT] Count elements in a predicate (scalar)
 175 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 176 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 177 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 178 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 179
 180 ;; =========================================================================
 181 ;; == General notes
 182 ;; =========================================================================
 183 ;;
 184 ;; -------------------------------------------------------------------------
 185 ;; ---- Note on the handling of big-endian SVE
 186 ;; -------------------------------------------------------------------------
 187 ;;
 188 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 189 ;; same way as movdi or movti would: the first byte of memory goes
 190 ;; into the most significant byte of the register and the last byte
 191 ;; of memory goes into the least significant byte of the register.
 192 ;; This is the most natural ordering for Advanced SIMD and matches
 193 ;; the ABI layout for 64-bit and 128-bit vector types.
 194 ;;
 195 ;; As a result, the order of bytes within the register is what GCC
 196 ;; expects for a big-endian target, and subreg offsets therefore work
 197 ;; as expected, with the first element in memory having subreg offset 0
 198 ;; and the last element in memory having the subreg offset associated
 199 ;; with a big-endian lowpart.  However, this ordering also means that
 200 ;; GCC's lane numbering does not match the architecture's numbering:
 201 ;; GCC always treats the element at the lowest address in memory
 202 ;; (subreg offset 0) as element 0, while the architecture treats
 203 ;; the least significant end of the register as element 0.
 204 ;;
 205 ;; The situation for SVE is different.  We want the layout of the
 206 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 207 ;; logically, a mov<mode> load must be indistinguishable from a
 208 ;; maskload<mode> whose mask is all true.  We therefore need the
 209 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 210 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 211 ;;
 212 ;; As a result, the architecture lane numbering matches GCC's lane
 213 ;; numbering, with element 0 always being the first in memory.
 214 ;; However:
 215 ;;
 216 ;; - Applying a subreg offset to a register does not give the element
 217 ;;   that GCC expects: the first element in memory has the subreg offset
 218 ;;   associated with a big-endian lowpart while the last element in memory
 219 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 220 ;;
 221 ;; - We cannot use LDR and STR for spill slots that might be accessed
 222 ;;   via subregs, since although the elements have the order GCC expects,
 223 ;;   the order of the bytes within the elements is different.  We instead
 224 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 225 ;;   reserve a predicate register.
 226 ;;
 227 ;; -------------------------------------------------------------------------
 228 ;; ---- Description of UNSPEC_PTEST
 229 ;; -------------------------------------------------------------------------
 230 ;;
 231 ;; SVE provides a PTEST instruction for testing the active lanes of a
 232 ;; predicate and setting the flags based on the result.  The associated
 233 ;; condition code tests are:
 234 ;;
 235 ;; - any   (= ne): at least one active bit is set
 236 ;; - none  (= eq): all active bits are clear (*)
 237 ;; - first (= mi): the first active bit is set
 238 ;; - nfrst (= pl): the first active bit is clear (*)
 239 ;; - last  (= cc): the last active bit is set
 240 ;; - nlast (= cs): the last active bit is clear (*)
 241 ;;
 242 ;; where the conditions marked (*) are also true when there are no active
 243 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 244 ;; of a PTEST use the condition code mode CC_NZC.
 245 ;;
 246 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 247 ;; This means that for other predicate modes, we need a governing predicate
 248 ;; in which all bits are defined.
 249 ;;
 250 ;; For example, most predicated .H operations ignore the odd bits of the
 251 ;; governing predicate, so that an active lane is represented by the
 252 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 253 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 254 ;; respectively, so that the condition only tests the even bits of the
 255 ;; predicate.
 256 ;;
 257 ;; Several instructions set the flags as a side-effect, in the same way
 258 ;; that a separate PTEST would.  It's important for code quality that we
 259 ;; use these flags results as often as possible, particularly in the case
 260 ;; of WHILE* and RDFFR.
 261 ;;
 262 ;; Also, some of the instructions that set the flags are unpredicated
 263 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 264 ;; they were predicated on a PTRUE of that size.  For example, a .S
 265 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 266 ;; would.
 267 ;;
 268 ;; We therefore need to represent PTEST operations in a way that
 269 ;; makes it easy to combine them with both predicated and unpredicated
 270 ;; operations, while using a VNx16BI governing predicate for all
 271 ;; predicate modes.  We do this using:
 272 ;;
 273 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 274 ;;
 275 ;; where:
 276 ;;
 277 ;; - GP is the real VNx16BI governing predicate
 278 ;;
 279 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 280 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 281 ;;
 282 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 283 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 284 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 285 ;;
 286 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 287 ;;
 288 ;; -------------------------------------------------------------------------
 289 ;; ---- Description of UNSPEC_PRED_Z
 290 ;; -------------------------------------------------------------------------
 291 ;;
 292 ;; SVE integer comparisons are predicated and return zero for inactive
 293 ;; lanes.  Sometimes we use them with predicates that are all-true and
 294 ;; sometimes we use them with general predicates.
 295 ;;
 296 ;; The integer comparisons also set the flags and so build-in the effect
 297 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 298 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 299 ;; that (as noted above) the PTEST is always a .B operation and so can place
 300 ;; stronger requirements on the governing predicate than the comparison does.
 301 ;;
 302 ;; For example, when applying a separate PTEST to the result of a full-vector
 303 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 304 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 305 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 306 ;; bits don't matter for .H operations.
 307 ;;
 308 ;; We therefore can't rely on a full-vector comparison using the same
 309 ;; predicate register as a following PTEST.  We instead need to remember
 310 ;; whether a comparison is known to be a full-vector comparison and use
 311 ;; this information in addition to a check for equal predicate registers.
 312 ;; At the same time, it's useful to have a common representation for all
 313 ;; integer comparisons, so that they can be handled by a single set of
 314 ;; patterns.
 315 ;;
 316 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 317 ;;
 318 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 319 ;;
 320 ;; where:
 321 ;;
 322 ;; - GP is the governing predicate, of mode <M:VPRED>
 323 ;;
 324 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 325 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 326 ;;   otherwise
 327 ;;
 328 ;; - CODE is the comparison code
 329 ;;
 330 ;; - OP0 and OP1 are the values being compared, of mode M
 331 ;;
 332 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 333 ;;
 334 ;; -------------------------------------------------------------------------
 335 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 336 ;; -------------------------------------------------------------------------
 337 ;;
 338 ;; Many SVE integer operations are predicated.  We can generate them
 339 ;; from four sources:
 340 ;;
 341 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 342 ;;     an all-true predicate register to act as the governing predicate
 343 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 344 ;;     the values of inactive lanes don't matter.
 345 ;;
 346 ;; (2) Using _x ACLE functions.  In this case the function provides a
 347 ;;     specific predicate and some lanes might be inactive.  However,
 348 ;;     as for (1), the values of the inactive lanes don't matter.
 349 ;;     We can make extra lanes active without changing the behavior
 350 ;;     (although for code-quality reasons we should avoid doing so
 351 ;;     needlessly).
 352 ;;
 353 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 354 ;;     These optabs have a predicate operand that specifies which lanes are
 355 ;;     active and another operand that provides the values of inactive lanes.
 356 ;;
 357 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 358 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 359 ;;     and the _m functions setting the inactive lanes to one of the function
 360 ;;     arguments.
 361 ;;
 362 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 363 ;; unpredicated integer operation.  We do this using:
 364 ;;
 365 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 366 ;;
 367 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 368 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 369 ;; it always is for (1), but might not be for (2).
 370 ;;
 371 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 372 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 373 ;; but as noted above, we should only do so if there's a specific benefit.
 374 ;;
 375 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 376 ;;
 377 ;; For (3) and (4) we can simply use the SVE port's normal representation
 378 ;; of a predicate-based select:
 379 ;;
 380 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 381 ;;
 382 ;; where INACTIVE specifies the values of inactive lanes.
 383 ;;
 384 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 385 ;; than inserting the integer operation directly.  This is mostly useful
 386 ;; if we want the combine pass to merge an integer operation with an explicit
 387 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 388 ;; it's generally better to merge such operations at the gimple level
 389 ;; using (3).
 390 ;;
 391 ;; -------------------------------------------------------------------------
 392 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 393 ;; -------------------------------------------------------------------------
 394 ;;
 395 ;; Most SVE floating-point operations are predicated.  We can generate
 396 ;; them from four sources:
 397 ;;
 398 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 399 ;;     an all-true predicate register to act as the governing predicate
 400 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 401 ;;     the values of inactive lanes don't matter.
 402 ;;
 403 ;; (2) Using _x ACLE functions.  In this case the function provides a
 404 ;;     specific predicate and some lanes might be inactive.  However,
 405 ;;     as for (1), the values of the inactive lanes don't matter.
 406 ;;
 407 ;;     The instruction must have the same exception behavior as the
 408 ;;     function call unless things like command-line flags specifically
 409 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 410 ;;     raise exceptions for inactive lanes, but normally it isn't.
 411 ;;
 412 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 413 ;;     These optabs have a predicate operand that specifies which lanes are
 414 ;;     active and another operand that provides the values of inactive lanes.
 415 ;;
 416 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 417 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 418 ;;     and the _m functions setting the inactive lanes to one of the function
 419 ;;     arguments.
 420 ;;
 421 ;; So:
 422 ;;
 423 ;; - In (1), the predicate is known to be all true and the pattern can use
 424 ;;   unpredicated operations where available.
 425 ;;
 426 ;; - In (2), the predicate might or might not be all true.  The pattern can
 427 ;;   use unpredicated instructions if the predicate is all-true or if things
 428 ;;   like command-line flags allow exceptions for inactive lanes.
 429 ;;
 430 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 431 ;;   might be inactive and inactive lanes of the result must have specific
 432 ;;   values.  There is no scope for using unpredicated instructions (and no
 433 ;;   reason to want to), so the question about command-line flags doesn't
 434 ;;   arise.
 435 ;;
 436 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 437 ;; in combination with a separate predicate operand, e.g.
 438 ;;
 439 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 440 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 441 ;;           ....)
 442 ;;
 443 ;; because (sqrt ...) can raise an exception for any lane, including
 444 ;; inactive ones.  We therefore need to use an unspec instead.
 445 ;;
 446 ;; Also, (2) requires some way of distinguishing the case in which the
 447 ;; predicate might have inactive lanes and cannot be changed from the
 448 ;; case in which the predicate has no inactive lanes or can be changed.
 449 ;; This information is also useful when matching combined FP patterns
 450 ;; in which the predicates might not be equal.
 451 ;;
 452 ;; We therefore model FP operations as an unspec of the form:
 453 ;;
 454 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 455 ;;
 456 ;; where:
 457 ;;
 458 ;; - PRED is the governing predicate.
 459 ;;
 460 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 461 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 462 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 463 ;;
 464 ;; - OP0 OP1 ... are the normal input operands to the operation.
 465 ;;
 466 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 467 ;;
 468 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
 469 ;; that selects between the result of the FP operation and the "else"
 470 ;; value.  (This else value is a merge input for _m ACLE functions
 471 ;; and zero for _z ACLE functions.)  The outer pattern then has the form:
 472 ;;
 473 ;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
 474 ;;
 475 ;; This means that the patterns for (3) and (4) have two predicates:
 476 ;; one for the FP operation itself and one for the UNSPEC_SEL.
 477 ;; This pattern is equivalent to the result of combining an instance
 478 ;; of (1) or (2) with a separate vcond instruction, so these patterns
 479 ;; are useful as combine targets too.
 480 ;;
 481 ;; However, in the combine case, the instructions that we want to
 482 ;; combine might use different predicates.  Then:
 483 ;;
 484 ;; - Some of the active lanes of the FP operation might be discarded
 485 ;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
 486 ;;   even for SVE_STRICT_GP, since the operations on those lanes are
 487 ;;   effectively dead code.
 488 ;;
 489 ;; - Some of the inactive lanes of the FP operation might be selected
 490 ;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
 491 ;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
 492 ;;   extra lanes, but SVE_STRICT_GP does not.
 493 ;;
 494 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
 495 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
 496 ;; This typically leads to patterns like:
 497 ;;
 498 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 499 ;;             (unspec [(match_operand N)
 500 ;;                      (const_int SVE_RELAXED_GP)
 501 ;;                      ...]
 502 ;;                     UNSPEC_COND_<MNEMONIC>)
 503 ;;             ...])
 504 ;;
 505 ;; where operand N is allowed to be anything.  These instructions then
 506 ;; have rewrite rules to replace operand N with operand 1, which gives the
 507 ;; instructions a canonical form and means that the original operand N is
 508 ;; not kept live unnecessarily.
 509 ;;
 510 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
 511 ;; a subset of the FP operation predicate.  This case isn't interesting
 512 ;; for FP operations that have an all-true predicate, since such operations
 513 ;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
 514 ;; conditions to track the subset relationship for arbitrary registers.
 515 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
 516 ;; in which the predicates match:
 517 ;;
 518 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 519 ;;             (unspec [(match_dup 1)
 520 ;;                      (const_int SVE_STRICT_GP)
 521 ;;                      ...]
 522 ;;                     UNSPEC_COND_<MNEMONIC>)
 523 ;;             ...])
 524 ;;
 525 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
 526 ;; be redundant with the one above.  However, if the combine pattern
 527 ;; has multiple FP operations, using a match_operand allows combinations
 528 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
 529 ;; that the predicates are the same:
 530 ;;
 531 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 532 ;;             (...
 533 ;;                (unspec [(match_dup 1)
 534 ;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
 535 ;;                         ...]
 536 ;;                        UNSPEC_COND_<MNEMONIC1>)
 537 ;;                (unspec [(match_dup 1)
 538 ;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
 539 ;;                         ...]
 540 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 541 ;;             ...])
 542 ;;
 543 ;; The fully-relaxed version of this pattern is:
 544 ;;
 545 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 546 ;;             (...
 547 ;;                (unspec [(match_operand:SI N)
 548 ;;                         (const_int SVE_RELAXED_GP)
 549 ;;                         ...]
 550 ;;                        UNSPEC_COND_<MNEMONIC1>)
 551 ;;                (unspec [(match_operand:SI M)
 552 ;;                         (const_int SVE_RELAXED_GP)
 553 ;;                         ...]
 554 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 555 ;;             ...])
 556 ;;
 557 ;; -------------------------------------------------------------------------
 558 ;; ---- Note on FFR handling
 559 ;; -------------------------------------------------------------------------
 560 ;;
 561 ;; Logically we want to divide FFR-related instructions into regions
 562 ;; that contain exactly one of:
 563 ;;
 564 ;; - a single write to the FFR
 565 ;; - any number of reads from the FFR (but only one read is likely)
 566 ;; - any number of LDFF1 and LDNF1 instructions
 567 ;;
 568 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 569 ;; normal loads as far as possible.  This means that they should be
 570 ;; schedulable within a region in the same way that LD1 would be,
 571 ;; and they should be deleted as dead if the result is unused.  The loads
 572 ;; should therefore not write to the FFR, since that would both serialize
 573 ;; the loads with respect to each other and keep the loads live for any
 574 ;; later RDFFR.
 575 ;;
 576 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 577 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 578 ;; while using the FFRT keeps the instruction within its region.
 579 ;; Specifically:
 580 ;;
 581 ;; - Writes start a new FFRT region as well as setting the FFR:
 582 ;;
 583 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 584 ;;
 585 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 586 ;;   loads stay within the same FFRT region:
 587 ;;
 588 ;;       L1: load data while using the FFRT
 589 ;;
 590 ;;   In addition, any FFRT region that includes a load also has at least one
 591 ;;   instance of:
 592 ;;
 593 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 594 ;;
 595 ;;   to make it clear that the region both reads from and writes to the FFR.
 596 ;;
 597 ;; - Reads do the following:
 598 ;;
 599 ;;       R1: FFRT = FFR               [type == no_insn]
 600 ;;       R2: read from the FFRT
 601 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 602 ;;
 603 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 604 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 605 ;;   cannot move backwards across R3.
 606 ;;
 607 ;; This way, writes are only kept alive by later loads or reads,
 608 ;; and write/read pairs fold normally.  For two consecutive reads,
 609 ;; the first R3 is made dead by the second R1, which in turn becomes
 610 ;; redundant with the first R1.  We then have:
 611 ;;
 612 ;;     first R1: FFRT = FFR
 613 ;;     first read from the FFRT
 614 ;;     second read from the FFRT
 615 ;;     second R3: FFRT = update(FFRT)
 616 ;;
 617 ;; i.e. the two FFRT regions collapse into a single one with two
 618 ;; independent reads.
 619 ;;
 620 ;; The model still prevents some valid optimizations though.  For example,
 621 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 622 ;; the L2 instructions.
 623
 624 ;; =========================================================================
 625 ;; == Moves
 626 ;; =========================================================================
 627
 628 ;; -------------------------------------------------------------------------
 629 ;; ---- Moves of single vectors
 630 ;; -------------------------------------------------------------------------
 631 ;; Includes:
 632 ;; - MOV  (including aliases)
 633 ;; - LD1B (contiguous form)
 634 ;; - LD1D (    "    "     )
 635 ;; - LD1H (    "    "     )
 636 ;; - LD1W (    "    "     )
 637 ;; - LDR
 638 ;; - ST1B (contiguous form)
 639 ;; - ST1D (    "    "     )
 640 ;; - ST1H (    "    "     )
 641 ;; - ST1W (    "    "     )
 642 ;; - STR
 643 ;; -------------------------------------------------------------------------
 644
 645 (define_expand "mov<mode>"
 646   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 647         (match_operand:SVE_ALL 1 "general_operand"))]
 648   "TARGET_SVE"
 649   {
 650     /* Use the predicated load and store patterns where possible.
 651        This is required for big-endian targets (see the comment at the
 652        head of the file) and increases the addressing choices for
 653        little-endian.  */
 654     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 655         && can_create_pseudo_p ())
 656       {
 657         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 658         DONE;
 659       }
 660
 661     if (CONSTANT_P (operands[1]))
 662       {
 663         aarch64_expand_mov_immediate (operands[0], operands[1]);
 664         DONE;
 665       }
 666
 667     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 668        instead of going through memory.  */
 669     if (BYTES_BIG_ENDIAN
 670         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 671       DONE;
 672   }
 673 )
 674
 675 (define_expand "movmisalign<mode>"
 676   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 677         (match_operand:SVE_ALL 1 "general_operand"))]
 678   "TARGET_SVE"
 679   {
 680     /* Equivalent to a normal move for our purpooses.  */
 681     emit_move_insn (operands[0], operands[1]);
 682     DONE;
 683   }
 684 )
 685
 686 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 687 ;; little-endian ordering is acceptable.  Only allow memory operations during
 688 ;; and after RA; before RA we want the predicated load and store patterns to
 689 ;; be used instead.
 690 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 691   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand")
 692         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand"))]
 693   "TARGET_SVE
 694    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 695    && ((lra_in_progress || reload_completed)
 696        || (register_operand (operands[0], <MODE>mode)
 697            && nonmemory_operand (operands[1], <MODE>mode)))"
 698   {@ [ cons: =0 , 1    ]
 699      [ w        , Utr  ] ldr\t%0, %1
 700      [ Utr      , w    ] str\t%1, %0
 701      [ w        , w    ] mov\t%0.d, %1.d
 702      [ w        , Dn   ] << aarch64_output_sve_mov_immediate (operands[1]);
 703   }
 704 )
 705
 706 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 707 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 708 ;; accesses require secondary reloads.
 709 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 710   [(set (match_operand:SVE_ALL 0 "register_operand")
 711         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand"))]
 712   "TARGET_SVE
 713    && <MODE>mode != VNx16QImode
 714    && (BYTES_BIG_ENDIAN
 715        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 716   {@ [ cons: =0 , 1   ]
 717      [ w        , w   ] mov\t%0.d, %1.d
 718      [ w        , Dn  ] << aarch64_output_sve_mov_immediate (operands[1]);
 719   }
 720 )
 721
 722 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 723 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 724 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 725 (define_expand "aarch64_sve_reload_mem"
 726   [(parallel
 727      [(set (match_operand 0)
 728            (match_operand 1))
 729       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 730   "TARGET_SVE"
 731   {
 732     /* Create a PTRUE.  */
 733     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 734
 735     /* Refer to the PTRUE in the appropriate mode for this move.  */
 736     machine_mode mode = GET_MODE (operands[0]);
 737     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 738
 739     /* Emit a predicated load or store.  */
 740     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 741     DONE;
 742   }
 743 )
 744
 745 ;; A predicated move in which the predicate is known to be all-true.
 746 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 747 ;; so changes to this pattern will need changes there as well.
 748 (define_insn_and_split "@aarch64_pred_mov<mode>"
 749   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 750         (unspec:SVE_ALL
 751           [(match_operand:<VPRED> 1 "register_operand")
 752            (match_operand:SVE_ALL 2 "nonimmediate_operand")]
 753           UNSPEC_PRED_X))]
 754   "TARGET_SVE
 755    && (register_operand (operands[0], <MODE>mode)
 756        || register_operand (operands[2], <MODE>mode))"
 757   {@ [ cons: =0 , 1   , 2  ]
 758      [ w        , Upl , w  ] #
 759      [ w        , Upl , m  ] ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 760      [ m        , Upl , w  ] st1<Vesize>\t%2.<Vctype>, %1, %0
 761   }
 762   "&& register_operand (operands[0], <MODE>mode)
 763    && register_operand (operands[2], <MODE>mode)"
 764   [(set (match_dup 0) (match_dup 2))]
 765 )
 766
 767 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 768 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 769 ;; for details.  We use a special predicate for operand 2 to reduce
 770 ;; the number of patterns.
 771 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 772   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 773         (unspec:SVE_ALL
 774           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 775            (match_operand 2 "aarch64_any_register_operand" "w")]
 776           UNSPEC_REV_SUBREG))]
 777   "TARGET_SVE && BYTES_BIG_ENDIAN"
 778   "#"
 779   "&& reload_completed"
 780   [(const_int 0)]
 781   {
 782     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 783     DONE;
 784   }
 785 )
 786
 787 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 788 ;; This is equivalent to a subreg on little-endian targets but not for
 789 ;; big-endian; see the comment at the head of the file for details.
 790 (define_expand "@aarch64_sve_reinterpret<mode>"
 791   [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand")
 792         (unspec:SVE_ALL_STRUCT
 793           [(match_operand 1 "aarch64_any_register_operand")]
 794           UNSPEC_REINTERPRET))]
 795   "TARGET_SVE"
 796   {
 797     machine_mode src_mode = GET_MODE (operands[1]);
 798     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 799       {
 800         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 801         DONE;
 802       }
 803   }
 804 )
 805
 806 ;; A pattern for handling type punning on big-endian targets.  We use a
 807 ;; special predicate for operand 1 to reduce the number of patterns.
 808 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 809   [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w")
 810         (unspec:SVE_ALL_STRUCT
 811           [(match_operand 1 "aarch64_any_register_operand" "w")]
 812           UNSPEC_REINTERPRET))]
 813   "TARGET_SVE"
 814   "#"
 815   "&& reload_completed"
 816   [(set (match_dup 0) (match_dup 1))]
 817   {
 818     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 819   }
 820 )
 821
 822 ;; -------------------------------------------------------------------------
 823 ;; ---- Moves of multiple vectors
 824 ;; -------------------------------------------------------------------------
 825 ;; All patterns in this section are synthetic and split to real
 826 ;; instructions after reload.
 827 ;; -------------------------------------------------------------------------
 828
 829 (define_expand "mov<mode>"
 830   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 831         (match_operand:SVE_STRUCT 1 "general_operand"))]
 832   "TARGET_SVE"
 833   {
 834     /* Big-endian loads and stores need to be done via LD1 and ST1;
 835        see the comment at the head of the file for details.  */
 836     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 837         && BYTES_BIG_ENDIAN)
 838       {
 839         gcc_assert (can_create_pseudo_p ());
 840         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 841         DONE;
 842       }
 843
 844     if (CONSTANT_P (operands[1]))
 845       {
 846         aarch64_expand_mov_immediate (operands[0], operands[1]);
 847         DONE;
 848       }
 849   }
 850 )
 851
 852 ;; Unpredicated structure moves (little-endian).
 853 (define_insn "*aarch64_sve_mov<mode>_le"
 854   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 855         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 856   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 857   "#"
 858   [(set_attr "length" "<insn_length>")]
 859 )
 860
 861 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 862 ;; secondary reloads.
 863 (define_insn "*aarch64_sve_mov<mode>_be"
 864   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 865         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 866   "TARGET_SVE && BYTES_BIG_ENDIAN"
 867   "#"
 868   [(set_attr "length" "<insn_length>")]
 869 )
 870
 871 ;; Split unpredicated structure moves into pieces.  This is the same
 872 ;; for both big-endian and little-endian code, although it only needs
 873 ;; to handle memory operands for little-endian code.
 874 (define_split
 875   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 876         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 877   "TARGET_SVE && reload_completed"
 878   [(const_int 0)]
 879   {
 880     rtx dest = operands[0];
 881     rtx src = operands[1];
 882     if (REG_P (dest) && REG_P (src))
 883       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 884     else
 885       for (unsigned int i = 0; i < <vector_count>; ++i)
 886         {
 887           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 888                                              i * BYTES_PER_SVE_VECTOR);
 889           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 890                                             i * BYTES_PER_SVE_VECTOR);
 891           emit_insn (gen_rtx_SET (subdest, subsrc));
 892         }
 893     DONE;
 894   }
 895 )
 896
 897 ;; Predicated structure moves.  This works for both endiannesses but in
 898 ;; practice is only useful for big-endian.
 899 (define_insn_and_split "@aarch64_pred_mov<mode>"
 900   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 901         (unspec:SVE_STRUCT
 902           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 903            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 904           UNSPEC_PRED_X))]
 905   "TARGET_SVE
 906    && (register_operand (operands[0], <MODE>mode)
 907        || register_operand (operands[2], <MODE>mode))"
 908   "#"
 909   "&& reload_completed"
 910   [(const_int 0)]
 911   {
 912     for (unsigned int i = 0; i < <vector_count>; ++i)
 913       {
 914         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 915                                            <MODE>mode,
 916                                            i * BYTES_PER_SVE_VECTOR);
 917         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 918                                           <MODE>mode,
 919                                           i * BYTES_PER_SVE_VECTOR);
 920         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 921       }
 922     DONE;
 923   }
 924   [(set_attr "length" "<insn_length>")]
 925 )
 926
 927 ;; -------------------------------------------------------------------------
 928 ;; ---- Moves of predicates
 929 ;; -------------------------------------------------------------------------
 930 ;; Includes:
 931 ;; - MOV
 932 ;; - LDR
 933 ;; - PFALSE
 934 ;; - PTRUE
 935 ;; - PTRUES
 936 ;; - STR
 937 ;; -------------------------------------------------------------------------
 938
 939 (define_expand "mov<mode>"
 940   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 941         (match_operand:PRED_ALL 1 "general_operand"))]
 942   "TARGET_SVE"
 943   {
 944     if (GET_CODE (operands[0]) == MEM)
 945       operands[1] = force_reg (<MODE>mode, operands[1]);
 946
 947     if (CONSTANT_P (operands[1]))
 948       {
 949         aarch64_expand_mov_immediate (operands[0], operands[1]);
 950         DONE;
 951       }
 952   }
 953 )
 954
 955 (define_insn "*aarch64_sve_mov<mode>"
 956   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 957         (match_operand:PRED_ALL 1 "aarch64_mov_operand"))]
 958   "TARGET_SVE
 959    && (register_operand (operands[0], <MODE>mode)
 960        || register_operand (operands[1], <MODE>mode))"
 961   {@ [ cons: =0 , 1    ]
 962      [ Upa      , Upa  ] mov\t%0.b, %1.b
 963      [ m        , Upa  ] str\t%1, %0
 964      [ Upa      , m    ] ldr\t%0, %1
 965      [ Upa      , Dn   ] << aarch64_output_sve_mov_immediate (operands[1]);
 966   }
 967 )
 968
 969 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 970 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 971   [(set (reg:CC_NZC CC_REGNUM)
 972         (unspec:CC_NZC
 973           [(match_operand 2)
 974            (match_operand 3)
 975            (const_int SVE_KNOWN_PTRUE)
 976            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 977              [(unspec:VNx16BI
 978                 [(match_operand:SI 4 "const_int_operand")
 979                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 980                 UNSPEC_PTRUE)])]
 981           UNSPEC_PTEST))
 982    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 983         (match_dup 1))]
 984   "TARGET_SVE"
 985   {
 986     return aarch64_output_sve_ptrues (operands[1]);
 987   }
 988   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 989   {
 990     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 991   }
 992 )
 993
 994 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 995 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 996   [(set (reg:CC_NZC CC_REGNUM)
 997         (unspec:CC_NZC
 998           [(match_operand 2)
 999            (match_operand 3)
1000            (const_int SVE_KNOWN_PTRUE)
1001            (subreg:PRED_HSD
1002              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1003                [(unspec:VNx16BI
1004                   [(match_operand:SI 4 "const_int_operand")
1005                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1006                   UNSPEC_PTRUE)]) 0)]
1007           UNSPEC_PTEST))
1008    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1009         (match_dup 1))]
1010   "TARGET_SVE"
1011   {
1012     return aarch64_output_sve_ptrues (operands[1]);
1013   }
1014   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1015   {
1016     operands[2] = CONSTM1_RTX (VNx16BImode);
1017     operands[3] = CONSTM1_RTX (<MODE>mode);
1018   }
1019 )
1020
1021 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1022 ;; a way of testing VL).
1023 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1024   [(set (reg:CC_NZC CC_REGNUM)
1025         (unspec:CC_NZC
1026           [(match_operand 2)
1027            (match_operand 3)
1028            (const_int SVE_KNOWN_PTRUE)
1029            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1030              [(unspec:VNx16BI
1031                 [(match_operand:SI 4 "const_int_operand")
1032                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1033                 UNSPEC_PTRUE)])]
1034           UNSPEC_PTEST))
1035    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1036   "TARGET_SVE"
1037   {
1038     return aarch64_output_sve_ptrues (operands[1]);
1039   }
1040   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1041   {
1042     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1043   }
1044 )
1045
1046 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1047 ;; a way of testing VL).
1048 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1049   [(set (reg:CC_NZC CC_REGNUM)
1050         (unspec:CC_NZC
1051           [(match_operand 2)
1052            (match_operand 3)
1053            (const_int SVE_KNOWN_PTRUE)
1054            (subreg:PRED_HSD
1055              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1056                [(unspec:VNx16BI
1057                   [(match_operand:SI 4 "const_int_operand")
1058                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1059                   UNSPEC_PTRUE)]) 0)]
1060           UNSPEC_PTEST))
1061    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1062   "TARGET_SVE"
1063   {
1064     return aarch64_output_sve_ptrues (operands[1]);
1065   }
1066   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1067   {
1068     operands[2] = CONSTM1_RTX (VNx16BImode);
1069     operands[3] = CONSTM1_RTX (<MODE>mode);
1070   }
1071 )
1072
1073 ;; -------------------------------------------------------------------------
1074 ;; ---- Moves of multiple predicates
1075 ;; -------------------------------------------------------------------------
1076
1077 (define_insn_and_split "movvnx32bi"
1078   [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
1079         (match_operand:VNx32BI 1 "aarch64_mov_operand"))]
1080   "TARGET_SVE"
1081   {@ [ cons: =0 , 1   ]
1082      [ Upa      , Upa ] #
1083      [ Upa      , m   ] #
1084      [ m        , Upa ] #
1085   }
1086   "&& reload_completed"
1087   [(const_int 0)]
1088   {
1089     aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
1090     DONE;
1091   }
1092 )
1093
1094 ;; -------------------------------------------------------------------------
1095 ;; ---- Moves relating to the FFR
1096 ;; -------------------------------------------------------------------------
1097 ;; RDFFR
1098 ;; RDFFRS
1099 ;; SETFFR
1100 ;; WRFFR
1101 ;; -------------------------------------------------------------------------
1102
1103 ;; [W1 in the block comment above about FFR handling]
1104 ;;
1105 ;; Write to the FFR and start a new FFRT scheduling region.
1106 (define_insn "aarch64_wrffr"
1107   [(set (reg:VNx16BI FFR_REGNUM)
1108         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one"))
1109    (set (reg:VNx16BI FFRT_REGNUM)
1110         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1111   "TARGET_SVE && TARGET_NON_STREAMING"
1112   {@ [ cons: 0 ]
1113      [ Dm      ] setffr
1114      [ Upa     ] wrffr\t%0.b
1115   }
1116 )
1117
1118 ;; [L2 in the block comment above about FFR handling]
1119 ;;
1120 ;; Introduce a read from and write to the FFR in the current FFRT region,
1121 ;; so that the FFR value is live on entry to the region and so that the FFR
1122 ;; value visibly changes within the region.  This is used (possibly multiple
1123 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1124 (define_insn "aarch64_update_ffr_for_load"
1125   [(set (reg:VNx16BI FFR_REGNUM)
1126         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1127                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1128   "TARGET_SVE"
1129   ""
1130   [(set_attr "type" "no_insn")]
1131 )
1132
1133 ;; [R1 in the block comment above about FFR handling]
1134 ;;
1135 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1136 ;; can be read from there by the RDFFR instructions below.  This acts
1137 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1138 ;; creates a natural dependency with earlier writes.
1139 (define_insn "aarch64_copy_ffr_to_ffrt"
1140   [(set (reg:VNx16BI FFRT_REGNUM)
1141         (reg:VNx16BI FFR_REGNUM))]
1142   "TARGET_SVE"
1143   ""
1144   [(set_attr "type" "no_insn")]
1145 )
1146
1147 ;; [R2 in the block comment above about FFR handling]
1148 ;;
1149 ;; Read the FFR via the FFRT.
1150 (define_insn "aarch64_rdffr"
1151   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1152         (reg:VNx16BI FFRT_REGNUM))]
1153   "TARGET_SVE && TARGET_NON_STREAMING"
1154   "rdffr\t%0.b"
1155 )
1156
1157 ;; Likewise with zero predication.
1158 (define_insn "aarch64_rdffr_z"
1159   [(set (match_operand:VNx16BI 0 "register_operand")
1160         (and:VNx16BI
1161           (reg:VNx16BI FFRT_REGNUM)
1162           (match_operand:VNx16BI 1 "register_operand")))]
1163   "TARGET_SVE && TARGET_NON_STREAMING"
1164   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1165      [ &Upa    , Upa ; yes                 ] rdffr\t%0.b, %1/z
1166      [ ?Upa    , 0Upa; yes                 ] ^
1167      [ Upa     , Upa ; no                  ] ^
1168   }
1169 )
1170
1171 ;; Read the FFR to test for a fault, without using the predicate result.
1172 (define_insn "*aarch64_rdffr_z_ptest"
1173   [(set (reg:CC_NZC CC_REGNUM)
1174         (unspec:CC_NZC
1175           [(match_operand:VNx16BI 1 "register_operand")
1176            (match_dup 1)
1177            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1178            (and:VNx16BI
1179              (reg:VNx16BI FFRT_REGNUM)
1180              (match_dup 1))]
1181           UNSPEC_PTEST))
1182    (clobber (match_scratch:VNx16BI 0))]
1183   "TARGET_SVE && TARGET_NON_STREAMING"
1184   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1185      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1186      [ ?Upa    , 0Upa; yes                 ] ^
1187      [ Upa     , Upa ; no                  ] ^
1188   }
1189 )
1190
1191 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1192 (define_insn "*aarch64_rdffr_ptest"
1193   [(set (reg:CC_NZC CC_REGNUM)
1194         (unspec:CC_NZC
1195           [(match_operand:VNx16BI 1 "register_operand")
1196            (match_dup 1)
1197            (const_int SVE_KNOWN_PTRUE)
1198            (reg:VNx16BI FFRT_REGNUM)]
1199           UNSPEC_PTEST))
1200    (clobber (match_scratch:VNx16BI 0))]
1201   "TARGET_SVE && TARGET_NON_STREAMING"
1202   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1203      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1204      [ ?Upa    , 0Upa; yes                 ] ^
1205      [ Upa     , Upa ; no                  ] ^
1206   }
1207 )
1208
1209 ;; Read the FFR with zero predication and test the result.
1210 (define_insn "*aarch64_rdffr_z_cc"
1211   [(set (reg:CC_NZC CC_REGNUM)
1212         (unspec:CC_NZC
1213           [(match_operand:VNx16BI 1 "register_operand")
1214            (match_dup 1)
1215            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1216            (and:VNx16BI
1217              (reg:VNx16BI FFRT_REGNUM)
1218              (match_dup 1))]
1219           UNSPEC_PTEST))
1220    (set (match_operand:VNx16BI 0 "register_operand")
1221         (and:VNx16BI
1222           (reg:VNx16BI FFRT_REGNUM)
1223           (match_dup 1)))]
1224   "TARGET_SVE && TARGET_NON_STREAMING"
1225   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1226      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1227      [ ?Upa    , 0Upa; yes                 ] ^
1228      [ Upa     , Upa ; no                  ] ^
1229   }
1230 )
1231
1232 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1233 (define_insn "*aarch64_rdffr_cc"
1234   [(set (reg:CC_NZC CC_REGNUM)
1235         (unspec:CC_NZC
1236           [(match_operand:VNx16BI 1 "register_operand")
1237            (match_dup 1)
1238            (const_int SVE_KNOWN_PTRUE)
1239            (reg:VNx16BI FFRT_REGNUM)]
1240           UNSPEC_PTEST))
1241    (set (match_operand:VNx16BI 0 "register_operand")
1242         (reg:VNx16BI FFRT_REGNUM))]
1243   "TARGET_SVE && TARGET_NON_STREAMING"
1244   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1245      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1246      [ ?Upa    , 0Upa; yes                 ] ^
1247      [ Upa     , Upa ; no                  ] ^
1248   }
1249 )
1250
1251 ;; [R3 in the block comment above about FFR handling]
1252 ;;
1253 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1254 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1255 (define_insn "aarch64_update_ffrt"
1256   [(set (reg:VNx16BI FFRT_REGNUM)
1257         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1258   "TARGET_SVE"
1259   ""
1260   [(set_attr "type" "no_insn")]
1261 )
1262
1263 ;; =========================================================================
1264 ;; == Loads
1265 ;; =========================================================================
1266
1267 ;; -------------------------------------------------------------------------
1268 ;; ---- Normal contiguous loads
1269 ;; -------------------------------------------------------------------------
1270 ;; Includes contiguous forms of:
1271 ;; - LD1B
1272 ;; - LD1D
1273 ;; - LD1H
1274 ;; - LD1W
1275 ;; - LD2B
1276 ;; - LD2D
1277 ;; - LD2H
1278 ;; - LD2W
1279 ;; - LD3B
1280 ;; - LD3D
1281 ;; - LD3H
1282 ;; - LD3W
1283 ;; - LD4B
1284 ;; - LD4D
1285 ;; - LD4H
1286 ;; - LD4W
1287 ;; -------------------------------------------------------------------------
1288
1289 ;; Predicated LD1 (single).
1290 (define_insn "maskload<mode><vpred>"
1291   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1292         (unspec:SVE_ALL
1293           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1294            (match_operand:SVE_ALL 1 "memory_operand" "m")]
1295           UNSPEC_LD1_SVE))]
1296   "TARGET_SVE"
1297   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1298 )
1299
1300 ;; Unpredicated LD[234].
1301 (define_expand "vec_load_lanes<mode><vsingle>"
1302   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1303         (unspec:SVE_STRUCT
1304           [(match_dup 2)
1305            (match_operand:SVE_STRUCT 1 "memory_operand")]
1306           UNSPEC_LDN))]
1307   "TARGET_SVE"
1308   {
1309     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1310   }
1311 )
1312
1313 ;; Predicated LD[234].
1314 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1315   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1316         (unspec:SVE_STRUCT
1317           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1318            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1319           UNSPEC_LDN))]
1320   "TARGET_SVE"
1321   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1322 )
1323
1324 ;; -------------------------------------------------------------------------
1325 ;; ---- Extending contiguous loads
1326 ;; -------------------------------------------------------------------------
1327 ;; Includes contiguous forms of:
1328 ;; LD1B
1329 ;; LD1H
1330 ;; LD1SB
1331 ;; LD1SH
1332 ;; LD1SW
1333 ;; LD1W
1334 ;; -------------------------------------------------------------------------
1335
1336 ;; Predicated load and extend, with 8 elements per 128-bit block.
1337 (define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1338   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1339         (unspec:SVE_HSDI
1340           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1341            (ANY_EXTEND:SVE_HSDI
1342              (unspec:SVE_PARTIAL_I
1343                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1344                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1345                SVE_PRED_LOAD))]
1346           UNSPEC_PRED_X))]
1347   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1348   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1349   "&& !CONSTANT_P (operands[3])"
1350   {
1351     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1352   }
1353 )
1354
1355 ;; -------------------------------------------------------------------------
1356 ;; ---- First-faulting contiguous loads
1357 ;; -------------------------------------------------------------------------
1358 ;; Includes contiguous forms of:
1359 ;; - LDFF1B
1360 ;; - LDFF1D
1361 ;; - LDFF1H
1362 ;; - LDFF1W
1363 ;; - LDNF1B
1364 ;; - LDNF1D
1365 ;; - LDNF1H
1366 ;; - LDNF1W
1367 ;; -------------------------------------------------------------------------
1368
1369 ;; Contiguous non-extending first-faulting or non-faulting loads.
1370 (define_insn "@aarch64_ld<fn>f1<mode>"
1371   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1372         (unspec:SVE_FULL
1373           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1374            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1375            (reg:VNx16BI FFRT_REGNUM)]
1376           SVE_LDFF1_LDNF1))]
1377   "TARGET_SVE && TARGET_NON_STREAMING"
1378   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1379 )
1380
1381 ;; -------------------------------------------------------------------------
1382 ;; ---- First-faulting extending contiguous loads
1383 ;; -------------------------------------------------------------------------
1384 ;; Includes contiguous forms of:
1385 ;; - LDFF1B
1386 ;; - LDFF1H
1387 ;; - LDFF1SB
1388 ;; - LDFF1SH
1389 ;; - LDFF1SW
1390 ;; - LDFF1W
1391 ;; - LDNF1B
1392 ;; - LDNF1H
1393 ;; - LDNF1SB
1394 ;; - LDNF1SH
1395 ;; - LDNF1SW
1396 ;; - LDNF1W
1397 ;; -------------------------------------------------------------------------
1398
1399 ;; Predicated first-faulting or non-faulting load and extend.
1400 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1401   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1402         (unspec:SVE_HSDI
1403           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1404            (ANY_EXTEND:SVE_HSDI
1405              (unspec:SVE_PARTIAL_I
1406                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1407                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1408                 (reg:VNx16BI FFRT_REGNUM)]
1409                SVE_LDFF1_LDNF1))]
1410           UNSPEC_PRED_X))]
1411   "TARGET_SVE
1412    && TARGET_NON_STREAMING
1413    && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1414   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1415   "&& !CONSTANT_P (operands[3])"
1416   {
1417     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1418   }
1419 )
1420
1421 ;; -------------------------------------------------------------------------
1422 ;; ---- Non-temporal contiguous loads
1423 ;; -------------------------------------------------------------------------
1424 ;; Includes:
1425 ;; - LDNT1B
1426 ;; - LDNT1D
1427 ;; - LDNT1H
1428 ;; - LDNT1W
1429 ;; -------------------------------------------------------------------------
1430
1431 ;; Predicated contiguous non-temporal load (single).
1432 (define_insn "@aarch64_ldnt1<mode>"
1433   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1434         (unspec:SVE_FULL
1435           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1436            (match_operand:SVE_FULL 1 "memory_operand" "m")]
1437           UNSPEC_LDNT1_SVE))]
1438   "TARGET_SVE"
1439   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1440 )
1441
1442 ;; -------------------------------------------------------------------------
1443 ;; ---- Normal gather loads
1444 ;; -------------------------------------------------------------------------
1445 ;; Includes gather forms of:
1446 ;; - LD1D
1447 ;; - LD1W
1448 ;; -------------------------------------------------------------------------
1449
1450 ;; Unpredicated gather loads.
1451 (define_expand "gather_load<mode><v_int_container>"
1452   [(set (match_operand:SVE_24 0 "register_operand")
1453         (unspec:SVE_24
1454           [(match_dup 5)
1455            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1456            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1457            (match_operand:DI 3 "const_int_operand")
1458            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1459            (mem:BLK (scratch))]
1460           UNSPEC_LD1_GATHER))]
1461   "TARGET_SVE && TARGET_NON_STREAMING"
1462   {
1463     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1464   }
1465 )
1466
1467 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1468 ;; unsigned extension and false for signed extension.
1469 (define_insn "mask_gather_load<mode><v_int_container>"
1470   [(set (match_operand:SVE_4 0 "register_operand")
1471         (unspec:SVE_4
1472           [(match_operand:VNx4BI 5 "register_operand")
1473            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1474            (match_operand:VNx4SI 2 "register_operand")
1475            (match_operand:DI 3 "const_int_operand")
1476            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1477            (mem:BLK (scratch))]
1478           UNSPEC_LD1_GATHER))]
1479   "TARGET_SVE && TARGET_NON_STREAMING"
1480   {@ [cons: =0, 1, 2, 3, 4, 5  ]
1481      [&w, Z,   w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1482      [?w, Z,   0, Ui1, Ui1, Upl] ^
1483      [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1484      [?w, vgw, 0, Ui1, Ui1, Upl] ^
1485      [&w, rk,  w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1486      [?w, rk,  0, Z,   Ui1, Upl] ^
1487      [&w, rk,  w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1488      [?w, rk,  0, Ui1, Ui1, Upl] ^
1489      [&w, rk,  w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1490      [?w, rk,  0, Z,   i,   Upl] ^
1491      [&w, rk,  w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1492      [?w, rk,  0, Ui1, i,   Upl] ^
1493   }
1494 )
1495
1496 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1497 ;; doesn't matter in this case.
1498 (define_insn "mask_gather_load<mode><v_int_container>"
1499   [(set (match_operand:SVE_2 0 "register_operand")
1500         (unspec:SVE_2
1501           [(match_operand:VNx2BI 5 "register_operand")
1502            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1503            (match_operand:VNx2DI 2 "register_operand")
1504            (match_operand:DI 3 "const_int_operand")
1505            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1506            (mem:BLK (scratch))]
1507           UNSPEC_LD1_GATHER))]
1508   "TARGET_SVE && TARGET_NON_STREAMING"
1509   {@ [cons: =0, 1, 2, 3, 4, 5]
1510      [&w, Z,   w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1511      [?w, Z,   0, i, Ui1, Upl] ^
1512      [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1513      [?w, vgd, 0, i, Ui1, Upl] ^
1514      [&w, rk,  w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1515      [?w, rk,  0, i, Ui1, Upl] ^
1516      [&w, rk,  w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1517      [?w, rk,  0, i, i,   Upl] ^
1518   }
1519 )
1520
1521 ;; Likewise, but with the offset being extended from 32 bits.
1522 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1523   [(set (match_operand:SVE_2 0 "register_operand")
1524         (unspec:SVE_2
1525           [(match_operand:VNx2BI 5 "register_operand")
1526            (match_operand:DI 1 "register_operand")
1527            (unspec:VNx2DI
1528              [(match_operand 6)
1529               (ANY_EXTEND:VNx2DI
1530                 (match_operand:VNx2SI 2 "register_operand"))]
1531              UNSPEC_PRED_X)
1532            (match_operand:DI 3 "const_int_operand")
1533            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1534            (mem:BLK (scratch))]
1535           UNSPEC_LD1_GATHER))]
1536   "TARGET_SVE && TARGET_NON_STREAMING"
1537   {@ [cons: =0, 1, 2, 3, 4, 5]
1538      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1539      [?w, rk, 0, i, Ui1, Upl ] ^
1540      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
1541      [?w, rk, 0, i, i,   Upl ] ^
1542   }
1543   "&& !CONSTANT_P (operands[6])"
1544   {
1545     operands[6] = CONSTM1_RTX (VNx2BImode);
1546   }
1547 )
1548
1549 ;; Likewise, but with the offset being truncated to 32 bits and then
1550 ;; sign-extended.
1551 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1552   [(set (match_operand:SVE_2 0 "register_operand")
1553         (unspec:SVE_2
1554           [(match_operand:VNx2BI 5 "register_operand")
1555            (match_operand:DI 1 "register_operand")
1556            (unspec:VNx2DI
1557              [(match_operand 6)
1558               (sign_extend:VNx2DI
1559                 (truncate:VNx2SI
1560                   (match_operand:VNx2DI 2 "register_operand")))]
1561              UNSPEC_PRED_X)
1562            (match_operand:DI 3 "const_int_operand")
1563            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1564            (mem:BLK (scratch))]
1565           UNSPEC_LD1_GATHER))]
1566   "TARGET_SVE && TARGET_NON_STREAMING"
1567   {@ [cons: =0, 1, 2, 3, 4, 5]
1568      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1569      [?w, rk, 0, i, Ui1, Upl ] ^
1570      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1571      [?w, rk, 0, i, i,   Upl ] ^
1572   }
1573   "&& !CONSTANT_P (operands[6])"
1574   {
1575     operands[6] = CONSTM1_RTX (VNx2BImode);
1576   }
1577 )
1578
1579 ;; Likewise, but with the offset being truncated to 32 bits and then
1580 ;; zero-extended.
1581 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1582   [(set (match_operand:SVE_2 0 "register_operand")
1583         (unspec:SVE_2
1584           [(match_operand:VNx2BI 5 "register_operand")
1585            (match_operand:DI 1 "register_operand")
1586            (and:VNx2DI
1587              (match_operand:VNx2DI 2 "register_operand")
1588              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1589            (match_operand:DI 3 "const_int_operand")
1590            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1591            (mem:BLK (scratch))]
1592           UNSPEC_LD1_GATHER))]
1593   "TARGET_SVE && TARGET_NON_STREAMING"
1594   {@ [cons: =0, 1, 2, 3, 4, 5]
1595      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1596      [?w, rk, 0, i, Ui1, Upl ] ^
1597      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1598      [?w, rk, 0, i, i,   Upl ] ^
1599   }
1600 )
1601
1602 ;; -------------------------------------------------------------------------
1603 ;; ---- Extending gather loads
1604 ;; -------------------------------------------------------------------------
1605 ;; Includes gather forms of:
1606 ;; - LD1B
1607 ;; - LD1H
1608 ;; - LD1SB
1609 ;; - LD1SH
1610 ;; - LD1SW
1611 ;; - LD1W
1612 ;; -------------------------------------------------------------------------
1613
1614 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1615 ;; true for unsigned extension and false for signed extension.
1616 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1617   [(set (match_operand:SVE_4HSI 0 "register_operand")
1618         (unspec:SVE_4HSI
1619           [(match_operand:VNx4BI 6 "general_operand")
1620            (ANY_EXTEND:SVE_4HSI
1621              (unspec:SVE_4BHI
1622                [(match_operand:VNx4BI 5 "register_operand")
1623                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>")
1624                 (match_operand:VNx4SI 2 "register_operand")
1625                 (match_operand:DI 3 "const_int_operand")
1626                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
1627                 (mem:BLK (scratch))]
1628                UNSPEC_LD1_GATHER))]
1629           UNSPEC_PRED_X))]
1630   "TARGET_SVE
1631    && TARGET_NON_STREAMING
1632    && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1633   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1634      [&w, Z,                   w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1635      [?w, Z,                   0, Ui1, Ui1, Upl, UplDnm] ^
1636      [&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1637      [?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^
1638      [&w, rk,                  w, Z,   Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1639      [?w, rk,                  0, Z,   Ui1, Upl, UplDnm] ^
1640      [&w, rk,                  w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1641      [?w, rk,                  0, Ui1, Ui1, Upl, UplDnm] ^
1642      [&w, rk,                  w, Z,   i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1643      [?w, rk,                  0, Z,   i,   Upl, UplDnm] ^
1644      [&w, rk,                  w, Ui1, i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1645      [?w, rk,                  0, Ui1, i,   Upl, UplDnm] ^
1646   }
1647   "&& !CONSTANT_P (operands[6])"
1648   {
1649     operands[6] = CONSTM1_RTX (VNx4BImode);
1650   }
1651 )
1652
1653 ;; Predicated extending gather loads for 64-bit elements.  The value of
1654 ;; operand 3 doesn't matter in this case.
1655 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1656   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1657         (unspec:SVE_2HSDI
1658           [(match_operand:VNx2BI 6 "general_operand")
1659            (ANY_EXTEND:SVE_2HSDI
1660              (unspec:SVE_2BHSI
1661                [(match_operand:VNx2BI 5 "register_operand")
1662                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>")
1663                 (match_operand:VNx2DI 2 "register_operand")
1664                 (match_operand:DI 3 "const_int_operand")
1665                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1666                 (mem:BLK (scratch))]
1667                UNSPEC_LD1_GATHER))]
1668           UNSPEC_PRED_X))]
1669   "TARGET_SVE
1670    && TARGET_NON_STREAMING
1671    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1672   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1673      [&w, Z,                    w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1674      [?w, Z,                    0, i, Ui1, Upl, UplDnm] ^
1675      [&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1676      [?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1677      [&w, rk,                   w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1678      [?w, rk,                   0, i, Ui1, Upl, UplDnm] ^
1679      [&w, rk,                   w, i, i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1680      [?w, rk,                   0, i, i,   Upl, UplDnm] ^
1681   }
1682   "&& !CONSTANT_P (operands[6])"
1683   {
1684     operands[6] = CONSTM1_RTX (VNx2BImode);
1685   }
1686 )
1687
1688 ;; Likewise, but with the offset being extended from 32 bits.
1689 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1690   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1691         (unspec:SVE_2HSDI
1692           [(match_operand 6)
1693            (ANY_EXTEND:SVE_2HSDI
1694              (unspec:SVE_2BHSI
1695                [(match_operand:VNx2BI 5 "register_operand")
1696                 (match_operand:DI 1 "aarch64_reg_or_zero")
1697                 (unspec:VNx2DI
1698                   [(match_operand 7)
1699                    (ANY_EXTEND2:VNx2DI
1700                      (match_operand:VNx2SI 2 "register_operand"))]
1701                   UNSPEC_PRED_X)
1702                 (match_operand:DI 3 "const_int_operand")
1703                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1704                 (mem:BLK (scratch))]
1705                UNSPEC_LD1_GATHER))]
1706           UNSPEC_PRED_X))]
1707   "TARGET_SVE
1708    && TARGET_NON_STREAMING
1709    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1710   {@ [cons: =0, 1, 2, 3, 4, 5]
1711      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1712      [?w, rk, 0, i, Ui1, Upl ] ^
1713      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
1714      [?w, rk, 0, i, i,   Upl ] ^
1715   }
1716   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1717   {
1718     operands[6] = CONSTM1_RTX (VNx2BImode);
1719     operands[7] = CONSTM1_RTX (VNx2BImode);
1720   }
1721 )
1722
1723 ;; Likewise, but with the offset being truncated to 32 bits and then
1724 ;; sign-extended.
1725 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1726   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1727         (unspec:SVE_2HSDI
1728           [(match_operand 6)
1729            (ANY_EXTEND:SVE_2HSDI
1730              (unspec:SVE_2BHSI
1731                [(match_operand:VNx2BI 5 "register_operand")
1732                 (match_operand:DI 1 "aarch64_reg_or_zero")
1733                 (unspec:VNx2DI
1734                   [(match_operand 7)
1735                    (sign_extend:VNx2DI
1736                      (truncate:VNx2SI
1737                        (match_operand:VNx2DI 2 "register_operand")))]
1738                   UNSPEC_PRED_X)
1739                 (match_operand:DI 3 "const_int_operand")
1740                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1741                 (mem:BLK (scratch))]
1742                UNSPEC_LD1_GATHER))]
1743           UNSPEC_PRED_X))]
1744   "TARGET_SVE
1745    && TARGET_NON_STREAMING
1746    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1747   {@ [cons: =0, 1, 2, 3, 4, 5]
1748      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1749      [?w, rk, 0, i, Ui1, Upl ] ^
1750      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1751      [?w, rk, 0, i, i,   Upl ] ^
1752   }
1753   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1754   {
1755     operands[6] = CONSTM1_RTX (VNx2BImode);
1756     operands[7] = CONSTM1_RTX (VNx2BImode);
1757   }
1758 )
1759
1760 ;; Likewise, but with the offset being truncated to 32 bits and then
1761 ;; zero-extended.
1762 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1763   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1764         (unspec:SVE_2HSDI
1765           [(match_operand 7)
1766            (ANY_EXTEND:SVE_2HSDI
1767              (unspec:SVE_2BHSI
1768                [(match_operand:VNx2BI 5 "register_operand")
1769                 (match_operand:DI 1 "aarch64_reg_or_zero")
1770                 (and:VNx2DI
1771                   (match_operand:VNx2DI 2 "register_operand")
1772                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1773                 (match_operand:DI 3 "const_int_operand")
1774                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1775                 (mem:BLK (scratch))]
1776                UNSPEC_LD1_GATHER))]
1777           UNSPEC_PRED_X))]
1778   "TARGET_SVE
1779    && TARGET_NON_STREAMING
1780    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1781   {@ [cons: =0, 1, 2, 3, 4, 5]
1782      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1783      [?w, rk, 0, i, Ui1, Upl ] ^
1784      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1785      [?w, rk, 0, i, i,   Upl ] ^
1786   }
1787   "&& !CONSTANT_P (operands[7])"
1788   {
1789     operands[7] = CONSTM1_RTX (VNx2BImode);
1790   }
1791 )
1792
1793 ;; -------------------------------------------------------------------------
1794 ;; ---- First-faulting gather loads
1795 ;; -------------------------------------------------------------------------
1796 ;; Includes gather forms of:
1797 ;; - LDFF1D
1798 ;; - LDFF1W
1799 ;; -------------------------------------------------------------------------
1800
1801 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1802 ;; 3 is true for unsigned extension and false for signed extension.
1803 (define_insn "@aarch64_ldff1_gather<mode>"
1804   [(set (match_operand:SVE_FULL_S 0 "register_operand")
1805         (unspec:SVE_FULL_S
1806           [(match_operand:VNx4BI 5 "register_operand")
1807            (match_operand:DI 1 "aarch64_sve_gather_offset_w")
1808            (match_operand:VNx4SI 2 "register_operand")
1809            (match_operand:DI 3 "const_int_operand")
1810            (match_operand:DI 4 "aarch64_gather_scale_operand_w")
1811            (mem:BLK (scratch))
1812            (reg:VNx16BI FFRT_REGNUM)]
1813           UNSPEC_LDFF1_GATHER))]
1814   "TARGET_SVE && TARGET_NON_STREAMING"
1815   {@ [cons: =0, 1, 2, 3, 4, 5  ]
1816      [&w, Z,   w, i,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
1817      [?w, Z,   0, i,   Ui1, Upl] ^
1818      [&w, vgw, w, i,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
1819      [?w, vgw, 0, i,   Ui1, Upl] ^
1820      [&w, rk,  w, Z,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1821      [?w, rk,  0, Z,   Ui1, Upl] ^
1822      [&w, rk,  w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1823      [?w, rk,  0, Ui1, Ui1, Upl] ^
1824      [&w, rk,  w, Z,   i,   Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1825      [?w, rk,  0, Z,   i,   Upl] ^
1826      [&w, rk,  w, Ui1, i,   Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1827      [?w, rk,  0, Ui1, i,   Upl] ^
1828   }
1829 )
1830
1831 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1832 ;; of operand 3 doesn't matter in this case.
1833 (define_insn "@aarch64_ldff1_gather<mode>"
1834   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1835         (unspec:SVE_FULL_D
1836           [(match_operand:VNx2BI 5 "register_operand")
1837            (match_operand:DI 1 "aarch64_sve_gather_offset_d")
1838            (match_operand:VNx2DI 2 "register_operand")
1839            (match_operand:DI 3 "const_int_operand")
1840            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1841            (mem:BLK (scratch))
1842            (reg:VNx16BI FFRT_REGNUM)]
1843           UNSPEC_LDFF1_GATHER))]
1844   "TARGET_SVE && TARGET_NON_STREAMING"
1845   {@ [cons: =0, 1, 2, 3, 4, 5 ]
1846      [&w, Z,   w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
1847      [?w, Z,   0, i, Ui1, Upl ] ^
1848      [&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
1849      [?w, vgd, 0, i, Ui1, Upl ] ^
1850      [&w, rk,  w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
1851      [?w, rk,  0, i, Ui1, Upl ] ^
1852      [&w, rk,  w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1853      [?w, rk,  0, i, i,   Upl ] ^
1854   }
1855 )
1856
1857 ;; Likewise, but with the offset being sign-extended from 32 bits.
1858 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1859   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1860         (unspec:SVE_FULL_D
1861           [(match_operand:VNx2BI 5 "register_operand")
1862            (match_operand:DI 1 "register_operand")
1863            (unspec:VNx2DI
1864              [(match_operand 6)
1865               (sign_extend:VNx2DI
1866                 (truncate:VNx2SI
1867                   (match_operand:VNx2DI 2 "register_operand")))]
1868              UNSPEC_PRED_X)
1869            (match_operand:DI 3 "const_int_operand")
1870            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1871            (mem:BLK (scratch))
1872            (reg:VNx16BI FFRT_REGNUM)]
1873           UNSPEC_LDFF1_GATHER))]
1874   "TARGET_SVE && TARGET_NON_STREAMING"
1875   {@ [cons: =0, 1, 2, 3, 4, 5]
1876      [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1877      [?w, rk, 0, i, Ui1, Upl ] ^
1878      [&w, rk, w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1879      [?w, rk, 0, i, i,   Upl ] ^
1880   }
1881   "&& !CONSTANT_P (operands[6])"
1882   {
1883     operands[6] = CONSTM1_RTX (VNx2BImode);
1884   }
1885 )
1886
1887 ;; Likewise, but with the offset being zero-extended from 32 bits.
1888 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1889   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1890         (unspec:SVE_FULL_D
1891           [(match_operand:VNx2BI 5 "register_operand")
1892            (match_operand:DI 1 "register_operand")
1893            (and:VNx2DI
1894              (match_operand:VNx2DI 2 "register_operand")
1895              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1896            (match_operand:DI 3 "const_int_operand")
1897            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1898            (mem:BLK (scratch))
1899            (reg:VNx16BI FFRT_REGNUM)]
1900           UNSPEC_LDFF1_GATHER))]
1901   "TARGET_SVE && TARGET_NON_STREAMING"
1902   {@ [cons: =0, 1, 2, 3, 4, 5]
1903      [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1904      [?w, rk, 0, i, Ui1, Upl ] ^
1905      [&w, rk, w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1906      [?w, rk, 0, i, i,   Upl ] ^
1907   }
1908 )
1909
1910 ;; -------------------------------------------------------------------------
1911 ;; ---- First-faulting extending gather loads
1912 ;; -------------------------------------------------------------------------
1913 ;; Includes gather forms of:
1914 ;; - LDFF1B
1915 ;; - LDFF1H
1916 ;; - LDFF1SB
1917 ;; - LDFF1SH
1918 ;; - LDFF1SW
1919 ;; - LDFF1W
1920 ;; -------------------------------------------------------------------------
1921
1922 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1923 ;; Operand 3 is true for unsigned extension and false for signed extension.
1924 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1925   [(set (match_operand:VNx4_WIDE 0 "register_operand")
1926         (unspec:VNx4_WIDE
1927           [(match_operand:VNx4BI 6 "general_operand")
1928            (ANY_EXTEND:VNx4_WIDE
1929              (unspec:VNx4_NARROW
1930                [(match_operand:VNx4BI 5 "register_operand")
1931                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>")
1932                 (match_operand:VNx4_WIDE 2 "register_operand")
1933                 (match_operand:DI 3 "const_int_operand")
1934                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>")
1935                 (mem:BLK (scratch))
1936                 (reg:VNx16BI FFRT_REGNUM)]
1937                UNSPEC_LDFF1_GATHER))]
1938           UNSPEC_PRED_X))]
1939   "TARGET_SVE && TARGET_NON_STREAMING"
1940   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1941      [&w, Z,                      w, i,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1942      [?w, Z,                      0, i,   Ui1, Upl, UplDnm] ^
1943      [&w, vg<VNx4_NARROW:Vesize>, w, i,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1944      [?w, vg<VNx4_NARROW:Vesize>, 0, i,   Ui1, Upl, UplDnm] ^
1945      [&w, rk,                     w, Z,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1946      [?w, rk,                     0, Z,   Ui1, Upl, UplDnm] ^
1947      [&w, rk,                     w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1948      [?w, rk,                     0, Ui1, Ui1, Upl, UplDnm] ^
1949      [&w, rk,                     w, Z,   i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1950      [?w, rk,                     0, Z,   i,   Upl, UplDnm] ^
1951      [&w, rk,                     w, Ui1, i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1952      [?w, rk,                     0, Ui1, i,   Upl, UplDnm] ^
1953   }
1954   "&& !CONSTANT_P (operands[6])"
1955   {
1956     operands[6] = CONSTM1_RTX (VNx4BImode);
1957   }
1958 )
1959
1960 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1961 ;; The value of operand 3 doesn't matter in this case.
1962 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1963   [(set (match_operand:VNx2_WIDE 0 "register_operand")
1964         (unspec:VNx2_WIDE
1965           [(match_operand:VNx2BI 6 "general_operand")
1966            (ANY_EXTEND:VNx2_WIDE
1967              (unspec:VNx2_NARROW
1968                [(match_operand:VNx2BI 5 "register_operand")
1969                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>")
1970                 (match_operand:VNx2_WIDE 2 "register_operand")
1971                 (match_operand:DI 3 "const_int_operand")
1972                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
1973                 (mem:BLK (scratch))
1974                 (reg:VNx16BI FFRT_REGNUM)]
1975                UNSPEC_LDFF1_GATHER))]
1976           UNSPEC_PRED_X))]
1977   "TARGET_SVE && TARGET_NON_STREAMING"
1978   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1979      [&w, Z,                      w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1980      [?w, Z,                      0, i, Ui1, Upl, UplDnm] ^
1981      [&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1982      [?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1983      [&w, rk,                     w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1984      [?w, rk,                     0, i, Ui1, Upl, UplDnm] ^
1985      [&w, rk,                     w, i, i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1986      [?w, rk,                     w, i, i,   Upl, UplDnm] ^
1987   }
1988   "&& !CONSTANT_P (operands[6])"
1989   {
1990     operands[6] = CONSTM1_RTX (VNx2BImode);
1991   }
1992 )
1993
1994 ;; Likewise, but with the offset being sign-extended from 32 bits.
1995 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1996   [(set (match_operand:VNx2_WIDE 0 "register_operand")
1997         (unspec:VNx2_WIDE
1998           [(match_operand 6)
1999            (ANY_EXTEND:VNx2_WIDE
2000              (unspec:VNx2_NARROW
2001                [(match_operand:VNx2BI 5 "register_operand")
2002                 (match_operand:DI 1 "aarch64_reg_or_zero")
2003                 (unspec:VNx2DI
2004                   [(match_operand 7)
2005                    (sign_extend:VNx2DI
2006                      (truncate:VNx2SI
2007                        (match_operand:VNx2DI 2 "register_operand")))]
2008                   UNSPEC_PRED_X)
2009                 (match_operand:DI 3 "const_int_operand")
2010                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2011                 (mem:BLK (scratch))
2012                 (reg:VNx16BI FFRT_REGNUM)]
2013                UNSPEC_LDFF1_GATHER))]
2014           UNSPEC_PRED_X))]
2015   "TARGET_SVE && TARGET_NON_STREAMING"
2016   {@ [cons: =0, 1, 2, 3, 4, 5]
2017      [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
2018      [?w, rk, 0, i, Ui1, Upl ] ^
2019      [&w, rk, w, i, i,   Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
2020      [?w, rk, 0, i, i,   Upl ] ^
2021   }
2022   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
2023   {
2024     operands[6] = CONSTM1_RTX (VNx2BImode);
2025     operands[7] = CONSTM1_RTX (VNx2BImode);
2026   }
2027 )
2028
2029 ;; Likewise, but with the offset being zero-extended from 32 bits.
2030 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
2031   [(set (match_operand:VNx2_WIDE 0 "register_operand")
2032         (unspec:VNx2_WIDE
2033           [(match_operand 7)
2034            (ANY_EXTEND:VNx2_WIDE
2035              (unspec:VNx2_NARROW
2036                [(match_operand:VNx2BI 5 "register_operand")
2037                 (match_operand:DI 1 "aarch64_reg_or_zero")
2038                 (and:VNx2DI
2039                   (match_operand:VNx2DI 2 "register_operand")
2040                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2041                 (match_operand:DI 3 "const_int_operand")
2042                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2043                 (mem:BLK (scratch))
2044                 (reg:VNx16BI FFRT_REGNUM)]
2045                UNSPEC_LDFF1_GATHER))]
2046           UNSPEC_PRED_X))]
2047   "TARGET_SVE && TARGET_NON_STREAMING"
2048   {@ [cons: =0, 1, 2, 3, 4, 5]
2049      [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
2050      [?w, rk, 0, i, Ui1, Upl ] ^
2051      [&w, rk, w, i, i,   Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
2052      [?w, rk, 0, i, i,   Upl ] ^
2053   }
2054   "&& !CONSTANT_P (operands[7])"
2055   {
2056     operands[7] = CONSTM1_RTX (VNx2BImode);
2057   }
2058 )
2059
2060 ;; =========================================================================
2061 ;; == Prefetches
2062 ;; =========================================================================
2063
2064 ;; -------------------------------------------------------------------------
2065 ;; ---- Contiguous prefetches
2066 ;; -------------------------------------------------------------------------
2067 ;; Includes contiguous forms of:
2068 ;; - PRFB
2069 ;; - PRFD
2070 ;; - PRFH
2071 ;; - PRFW
2072 ;; -------------------------------------------------------------------------
2073
2074 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
2075 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
2076 ;; information.
2077 (define_insn "@aarch64_sve_prefetch<mode>"
2078   [(prefetch (unspec:DI
2079                [(match_operand:<VPRED> 0 "register_operand" "Upl")
2080                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
2081                 (match_operand:DI 2 "const_int_operand")]
2082                UNSPEC_SVE_PREFETCH)
2083              (match_operand:DI 3 "const_int_operand")
2084              (match_operand:DI 4 "const_int_operand"))]
2085   "TARGET_SVE"
2086   {
2087     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
2088     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
2089   }
2090 )
2091
2092 ;; -------------------------------------------------------------------------
2093 ;; ---- Gather prefetches
2094 ;; -------------------------------------------------------------------------
2095 ;; Includes gather forms of:
2096 ;; - PRFB
2097 ;; - PRFD
2098 ;; - PRFH
2099 ;; - PRFW
2100 ;; -------------------------------------------------------------------------
2101
2102 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
2103 ;; are:
2104 ;; 0: the governing predicate
2105 ;; 1: the scalar component of the address
2106 ;; 2: the vector component of the address
2107 ;; 3: 1 for zero extension, 0 for sign extension
2108 ;; 4: the scale multiplier
2109 ;; 5: a vector zero that identifies the mode of data being accessed
2110 ;; 6: the prefetch operator (an svprfop)
2111 ;; 7: the normal RTL prefetch rw flag
2112 ;; 8: the normal RTL prefetch locality value
2113 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
2114   [(prefetch (unspec:DI
2115                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2116                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
2117                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
2118                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
2119                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2120                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2121                 (match_operand:DI 6 "const_int_operand")]
2122                UNSPEC_SVE_PREFETCH_GATHER)
2123              (match_operand:DI 7 "const_int_operand")
2124              (match_operand:DI 8 "const_int_operand"))]
2125   "TARGET_SVE && TARGET_NON_STREAMING"
2126   {
2127     static const char *const insns[][2] = {
2128       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
2129       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
2130       "prfb", "%0, [%1, %2.s, sxtw]",
2131       "prfb", "%0, [%1, %2.s, uxtw]",
2132       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
2133       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
2134     };
2135     const char *const *parts = insns[which_alternative];
2136     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2137   }
2138 )
2139
2140 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
2141 ;; doesn't matter in this case.
2142 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2143   [(prefetch (unspec:DI
2144                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2145                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2146                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2147                 (match_operand:DI 3 "const_int_operand")
2148                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2149                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2150                 (match_operand:DI 6 "const_int_operand")]
2151                UNSPEC_SVE_PREFETCH_GATHER)
2152              (match_operand:DI 7 "const_int_operand")
2153              (match_operand:DI 8 "const_int_operand"))]
2154   "TARGET_SVE && TARGET_NON_STREAMING"
2155   {
2156     static const char *const insns[][2] = {
2157       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2158       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2159       "prfb", "%0, [%1, %2.d]",
2160       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2161     };
2162     const char *const *parts = insns[which_alternative];
2163     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2164   }
2165 )
2166
2167 ;; Likewise, but with the offset being sign-extended from 32 bits.
2168 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2169   [(prefetch (unspec:DI
2170                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2171                 (match_operand:DI 1 "register_operand" "rk, rk")
2172                 (unspec:VNx2DI_ONLY
2173                   [(match_operand 9)
2174                    (sign_extend:VNx2DI
2175                      (truncate:VNx2SI
2176                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2177                   UNSPEC_PRED_X)
2178                 (match_operand:DI 3 "const_int_operand")
2179                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2180                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2181                 (match_operand:DI 6 "const_int_operand")]
2182                UNSPEC_SVE_PREFETCH_GATHER)
2183              (match_operand:DI 7 "const_int_operand")
2184              (match_operand:DI 8 "const_int_operand"))]
2185   "TARGET_SVE && TARGET_NON_STREAMING"
2186   {
2187     static const char *const insns[][2] = {
2188       "prfb", "%0, [%1, %2.d, sxtw]",
2189       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2190     };
2191     const char *const *parts = insns[which_alternative];
2192     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2193   }
2194   "&& !rtx_equal_p (operands[0], operands[9])"
2195   {
2196     operands[9] = copy_rtx (operands[0]);
2197   }
2198 )
2199
2200 ;; Likewise, but with the offset being zero-extended from 32 bits.
2201 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2202   [(prefetch (unspec:DI
2203                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2204                 (match_operand:DI 1 "register_operand" "rk, rk")
2205                 (and:VNx2DI_ONLY
2206                   (match_operand:VNx2DI 2 "register_operand" "w, w")
2207                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2208                 (match_operand:DI 3 "const_int_operand")
2209                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2210                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2211                 (match_operand:DI 6 "const_int_operand")]
2212                UNSPEC_SVE_PREFETCH_GATHER)
2213              (match_operand:DI 7 "const_int_operand")
2214              (match_operand:DI 8 "const_int_operand"))]
2215   "TARGET_SVE && TARGET_NON_STREAMING"
2216   {
2217     static const char *const insns[][2] = {
2218       "prfb", "%0, [%1, %2.d, uxtw]",
2219       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2220     };
2221     const char *const *parts = insns[which_alternative];
2222     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2223   }
2224 )
2225
2226 ;; =========================================================================
2227 ;; == Stores
2228 ;; =========================================================================
2229
2230 ;; -------------------------------------------------------------------------
2231 ;; ---- Normal contiguous stores
2232 ;; -------------------------------------------------------------------------
2233 ;; Includes contiguous forms of:
2234 ;; - ST1B
2235 ;; - ST1D
2236 ;; - ST1H
2237 ;; - ST1W
2238 ;; - ST2B
2239 ;; - ST2D
2240 ;; - ST2H
2241 ;; - ST2W
2242 ;; - ST3B
2243 ;; - ST3D
2244 ;; - ST3H
2245 ;; - ST3W
2246 ;; - ST4B
2247 ;; - ST4D
2248 ;; - ST4H
2249 ;; - ST4W
2250 ;; -------------------------------------------------------------------------
2251
2252 ;; Predicated ST1 (single).
2253 (define_insn "maskstore<mode><vpred>"
2254   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2255         (unspec:SVE_ALL
2256           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2257            (match_operand:SVE_ALL 1 "register_operand" "w")
2258            (match_dup 0)]
2259           UNSPEC_ST1_SVE))]
2260   "TARGET_SVE"
2261   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2262 )
2263
2264 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2265 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2266 ;; There doesn't seem to be any obvious benefit to treating the all-true
2267 ;; case differently though.  In particular, it's very unlikely that we'll
2268 ;; only find out during RTL that a store_lanes is dead.
2269 (define_expand "vec_store_lanes<mode><vsingle>"
2270   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2271         (unspec:SVE_STRUCT
2272           [(match_dup 2)
2273            (match_operand:SVE_STRUCT 1 "register_operand")
2274            (match_dup 0)]
2275           UNSPEC_STN))]
2276   "TARGET_SVE"
2277   {
2278     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2279   }
2280 )
2281
2282 ;; Predicated ST[234].
2283 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2284   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2285         (unspec:SVE_STRUCT
2286           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2287            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2288            (match_dup 0)]
2289           UNSPEC_STN))]
2290   "TARGET_SVE"
2291   "st<vector_count><Vesize>\t%1, %2, %0"
2292 )
2293
2294 ;; -------------------------------------------------------------------------
2295 ;; ---- Truncating contiguous stores
2296 ;; -------------------------------------------------------------------------
2297 ;; Includes:
2298 ;; - ST1B
2299 ;; - ST1H
2300 ;; - ST1W
2301 ;; -------------------------------------------------------------------------
2302
2303 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2304 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2305   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2306         (unspec:VNx8_NARROW
2307           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2308            (truncate:VNx8_NARROW
2309              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2310            (match_dup 0)]
2311           UNSPEC_ST1_SVE))]
2312   "TARGET_SVE"
2313   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2314 )
2315
2316 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2317 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2318   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2319         (unspec:VNx4_NARROW
2320           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2321            (truncate:VNx4_NARROW
2322              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2323            (match_dup 0)]
2324           UNSPEC_ST1_SVE))]
2325   "TARGET_SVE"
2326   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2327 )
2328
2329 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2330 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2331   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2332         (unspec:VNx2_NARROW
2333           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2334            (truncate:VNx2_NARROW
2335              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2336            (match_dup 0)]
2337           UNSPEC_ST1_SVE))]
2338   "TARGET_SVE"
2339   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2340 )
2341
2342 ;; -------------------------------------------------------------------------
2343 ;; ---- Non-temporal contiguous stores
2344 ;; -------------------------------------------------------------------------
2345 ;; Includes:
2346 ;; - STNT1B
2347 ;; - STNT1D
2348 ;; - STNT1H
2349 ;; - STNT1W
2350 ;; -------------------------------------------------------------------------
2351
2352 (define_insn "@aarch64_stnt1<mode>"
2353   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2354         (unspec:SVE_FULL
2355           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2356            (match_operand:SVE_FULL 1 "register_operand" "w")
2357            (match_dup 0)]
2358           UNSPEC_STNT1_SVE))]
2359   "TARGET_SVE"
2360   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2361 )
2362
2363 ;; -------------------------------------------------------------------------
2364 ;; ---- Normal scatter stores
2365 ;; -------------------------------------------------------------------------
2366 ;; Includes scatter forms of:
2367 ;; - ST1D
2368 ;; - ST1W
2369 ;; -------------------------------------------------------------------------
2370
2371 ;; Unpredicated scatter stores.
2372 (define_expand "scatter_store<mode><v_int_container>"
2373   [(set (mem:BLK (scratch))
2374         (unspec:BLK
2375           [(match_dup 5)
2376            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2377            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2378            (match_operand:DI 2 "const_int_operand")
2379            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2380            (match_operand:SVE_24 4 "register_operand")]
2381           UNSPEC_ST1_SCATTER))]
2382   "TARGET_SVE && TARGET_NON_STREAMING"
2383   {
2384     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2385   }
2386 )
2387
2388 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2389 ;; unsigned extension and false for signed extension.
2390 (define_insn "mask_scatter_store<mode><v_int_container>"
2391   [(set (mem:BLK (scratch))
2392         (unspec:BLK
2393           [(match_operand:VNx4BI 5 "register_operand")
2394            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2395            (match_operand:VNx4SI 1 "register_operand")
2396            (match_operand:DI 2 "const_int_operand")
2397            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2398            (match_operand:SVE_4 4 "register_operand")]
2399           UNSPEC_ST1_SCATTER))]
2400   "TARGET_SVE && TARGET_NON_STREAMING"
2401   {@ [ cons: 0 , 1 , 2   , 3   , 4 , 5    ]
2402      [ Z       , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
2403      [ vgw     , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2404      [ rk      , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2405      [ rk      , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2406      [ rk      , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2407      [ rk      , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2408   }
2409 )
2410
2411 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2412 ;; doesn't matter in this case.
2413 (define_insn "mask_scatter_store<mode><v_int_container>"
2414   [(set (mem:BLK (scratch))
2415         (unspec:BLK
2416           [(match_operand:VNx2BI 5 "register_operand")
2417            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2418            (match_operand:VNx2DI 1 "register_operand")
2419            (match_operand:DI 2 "const_int_operand")
2420            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2421            (match_operand:SVE_2 4 "register_operand")]
2422           UNSPEC_ST1_SCATTER))]
2423   "TARGET_SVE && TARGET_NON_STREAMING"
2424   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2425      [ Z       , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
2426      [ vgd     , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2427      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2428      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2429   }
2430 )
2431
2432 ;; Likewise, but with the offset being extended from 32 bits.
2433 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2434   [(set (mem:BLK (scratch))
2435         (unspec:BLK
2436           [(match_operand:VNx2BI 5 "register_operand")
2437            (match_operand:DI 0 "register_operand")
2438            (unspec:VNx2DI
2439              [(match_operand 6)
2440               (ANY_EXTEND:VNx2DI
2441                 (match_operand:VNx2SI 1 "register_operand"))]
2442              UNSPEC_PRED_X)
2443            (match_operand:DI 2 "const_int_operand")
2444            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2445            (match_operand:SVE_2 4 "register_operand")]
2446           UNSPEC_ST1_SCATTER))]
2447   "TARGET_SVE && TARGET_NON_STREAMING"
2448   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2449      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2450      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]
2451   }
2452   "&& !CONSTANT_P (operands[6])"
2453   {
2454     operands[6] = CONSTM1_RTX (<VPRED>mode);
2455   }
2456 )
2457
2458 ;; Likewise, but with the offset being truncated to 32 bits and then
2459 ;; sign-extended.
2460 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2461   [(set (mem:BLK (scratch))
2462         (unspec:BLK
2463           [(match_operand:VNx2BI 5 "register_operand")
2464            (match_operand:DI 0 "register_operand")
2465            (unspec:VNx2DI
2466              [(match_operand 6)
2467               (sign_extend:VNx2DI
2468                 (truncate:VNx2SI
2469                   (match_operand:VNx2DI 1 "register_operand")))]
2470              UNSPEC_PRED_X)
2471            (match_operand:DI 2 "const_int_operand")
2472            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2473            (match_operand:SVE_2 4 "register_operand")]
2474           UNSPEC_ST1_SCATTER))]
2475   "TARGET_SVE && TARGET_NON_STREAMING"
2476   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2477      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2478      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2479   }
2480   "&& !CONSTANT_P (operands[6])"
2481   {
2482     operands[6] = CONSTM1_RTX (<VPRED>mode);
2483   }
2484 )
2485
2486 ;; Likewise, but with the offset being truncated to 32 bits and then
2487 ;; zero-extended.
2488 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2489   [(set (mem:BLK (scratch))
2490         (unspec:BLK
2491           [(match_operand:VNx2BI 5 "register_operand")
2492            (match_operand:DI 0 "aarch64_reg_or_zero")
2493            (and:VNx2DI
2494              (match_operand:VNx2DI 1 "register_operand")
2495              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2496            (match_operand:DI 2 "const_int_operand")
2497            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2498            (match_operand:SVE_2 4 "register_operand")]
2499           UNSPEC_ST1_SCATTER))]
2500   "TARGET_SVE && TARGET_NON_STREAMING"
2501   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2502      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2503      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2504   }
2505 )
2506
2507 ;; -------------------------------------------------------------------------
2508 ;; ---- Truncating scatter stores
2509 ;; -------------------------------------------------------------------------
2510 ;; Includes scatter forms of:
2511 ;; - ST1B
2512 ;; - ST1H
2513 ;; - ST1W
2514 ;; -------------------------------------------------------------------------
2515
2516 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2517 ;; true for unsigned extension and false for signed extension.
2518 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2519   [(set (mem:BLK (scratch))
2520         (unspec:BLK
2521           [(match_operand:VNx4BI 5 "register_operand")
2522            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2523            (match_operand:VNx4SI 1 "register_operand")
2524            (match_operand:DI 2 "const_int_operand")
2525            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2526            (truncate:VNx4_NARROW
2527              (match_operand:VNx4_WIDE 4 "register_operand"))]
2528           UNSPEC_ST1_SCATTER))]
2529   "TARGET_SVE && TARGET_NON_STREAMING"
2530   {@ [ cons: 1 , 2   , 4 , 5    ]
2531      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2532      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2533      [ w       , Z   , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2534      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2535      [ w       , Z   , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2536      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2537   }
2538 )
2539
2540 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2541 ;; operand 2 doesn't matter in this case.
2542 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2543   [(set (mem:BLK (scratch))
2544         (unspec:BLK
2545           [(match_operand:VNx2BI 5 "register_operand")
2546            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2547            (match_operand:VNx2DI 1 "register_operand")
2548            (match_operand:DI 2 "const_int_operand")
2549            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2550            (truncate:VNx2_NARROW
2551              (match_operand:VNx2_WIDE 4 "register_operand"))]
2552           UNSPEC_ST1_SCATTER))]
2553   "TARGET_SVE && TARGET_NON_STREAMING"
2554   {@ [ cons: 1 , 4 , 5    ]
2555      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2556      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2557      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2558      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2559   }
2560 )
2561
2562 ;; Likewise, but with the offset being sign-extended from 32 bits.
2563 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2564   [(set (mem:BLK (scratch))
2565         (unspec:BLK
2566           [(match_operand:VNx2BI 5 "register_operand")
2567            (match_operand:DI 0 "register_operand")
2568            (unspec:VNx2DI
2569              [(match_operand 6)
2570               (sign_extend:VNx2DI
2571                 (truncate:VNx2SI
2572                   (match_operand:VNx2DI 1 "register_operand")))]
2573              UNSPEC_PRED_X)
2574            (match_operand:DI 2 "const_int_operand")
2575            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2576            (truncate:VNx2_NARROW
2577              (match_operand:VNx2_WIDE 4 "register_operand"))]
2578           UNSPEC_ST1_SCATTER))]
2579   "TARGET_SVE && TARGET_NON_STREAMING"
2580   {@ [ cons: 0 , 1 , 4 , 5    ]
2581      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2582      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2583   }
2584   "&& !rtx_equal_p (operands[5], operands[6])"
2585   {
2586     operands[6] = copy_rtx (operands[5]);
2587   }
2588 )
2589
2590 ;; Likewise, but with the offset being zero-extended from 32 bits.
2591 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2592   [(set (mem:BLK (scratch))
2593         (unspec:BLK
2594           [(match_operand:VNx2BI 5 "register_operand")
2595            (match_operand:DI 0 "aarch64_reg_or_zero")
2596            (and:VNx2DI
2597              (match_operand:VNx2DI 1 "register_operand")
2598              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2599            (match_operand:DI 2 "const_int_operand")
2600            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2601            (truncate:VNx2_NARROW
2602              (match_operand:VNx2_WIDE 4 "register_operand"))]
2603           UNSPEC_ST1_SCATTER))]
2604   "TARGET_SVE && TARGET_NON_STREAMING"
2605   {@ [ cons: 0 , 1 , 4 , 5    ]
2606      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2607      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2608   }
2609 )
2610
2611 ;; =========================================================================
2612 ;; == Vector creation
2613 ;; =========================================================================
2614
2615 ;; -------------------------------------------------------------------------
2616 ;; ---- [INT,FP] Duplicate element
2617 ;; -------------------------------------------------------------------------
2618 ;; Includes:
2619 ;; - DUP
2620 ;; - MOV
2621 ;; - LD1RB
2622 ;; - LD1RD
2623 ;; - LD1RH
2624 ;; - LD1RW
2625 ;; - LD1ROB (F64MM)
2626 ;; - LD1ROD (F64MM)
2627 ;; - LD1ROH (F64MM)
2628 ;; - LD1ROW (F64MM)
2629 ;; - LD1RQB
2630 ;; - LD1RQD
2631 ;; - LD1RQH
2632 ;; - LD1RQW
2633 ;; -------------------------------------------------------------------------
2634
2635 (define_expand "vec_duplicate<mode>"
2636   [(parallel
2637     [(set (match_operand:SVE_ALL 0 "register_operand")
2638           (vec_duplicate:SVE_ALL
2639             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2640      (clobber (scratch:VNx16BI))])]
2641   "TARGET_SVE"
2642   {
2643     if (MEM_P (operands[1]))
2644       {
2645         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2646         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2647                                        CONST0_RTX (<MODE>mode)));
2648         DONE;
2649       }
2650   }
2651 )
2652
2653 ;; Accept memory operands for the benefit of combine, and also in case
2654 ;; the scalar input gets spilled to memory during RA.  We want to split
2655 ;; the load at the first opportunity in order to allow the PTRUE to be
2656 ;; optimized with surrounding code.
2657 (define_insn_and_split "*vec_duplicate<mode>_reg"
2658   [(set (match_operand:SVE_ALL 0 "register_operand")
2659         (vec_duplicate:SVE_ALL
2660           (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2661    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2662   "TARGET_SVE"
2663   {@ [ cons: =0 , 1   ; attrs: length ]
2664      [ w        , r   ; 4             ] mov\t%0.<Vetype>, %<vwcore>1
2665      [ w        , w   ; 4             ] mov\t%0.<Vetype>, %<Vetype>1
2666      [ w        , Uty ; 8             ] #
2667   }
2668   "&& MEM_P (operands[1])"
2669   [(const_int 0)]
2670   {
2671     if (GET_CODE (operands[2]) == SCRATCH)
2672       operands[2] = gen_reg_rtx (VNx16BImode);
2673     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2674     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2675     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2676                                    CONST0_RTX (<MODE>mode)));
2677     DONE;
2678   }
2679 )
2680
2681 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2682 ;;
2683 ;; The addressing mode range of LD1RQ does not match the addressing mode
2684 ;; range of LDR Qn.  If the predicate enforced the LD1RQ range, we would
2685 ;; not be able to combine LDR Qns outside that range.  The predicate
2686 ;; therefore accepts all memory operands, with only the constraints
2687 ;; enforcing the actual restrictions.  If the instruction is split
2688 ;; before RA, we need to load invalid addresses into a temporary.
2689
2690 (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
2691   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
2692         (vec_duplicate:SVE_FULL
2693           (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
2694    (clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
2695   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2696   {
2697     switch (which_alternative)
2698       {
2699         case 0:
2700           operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2701           return "dup\t%0.q, %1.q[0]";
2702         case 1:
2703           return "#";
2704         default:
2705           gcc_unreachable ();
2706       }
2707   }
2708   "&& MEM_P (operands[1])"
2709   [(const_int 0)]
2710   {
2711     if (can_create_pseudo_p ()
2712         && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
2713       operands[1] = force_reload_address (operands[1]);
2714     if (GET_CODE (operands[2]) == SCRATCH)
2715       operands[2] = gen_reg_rtx (VNx16BImode);
2716     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2717     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2718     emit_insn (gen_aarch64_sve_ld1rq<mode> (operands[0], operands[1], gp));
2719     DONE;
2720   }
2721 )
2722
2723 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2724 ;; The SVE register layout puts memory lane N into (architectural)
2725 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2726 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2727 ;; terms as a reverse of the V128 vector followed by a duplicate.
2728 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2729   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2730         (vec_duplicate:SVE_FULL
2731           (vec_select:<V128>
2732             (match_operand:<V128> 1 "register_operand" "w")
2733             (match_operand 2 "descending_int_parallel"))))]
2734   "TARGET_SVE
2735    && BYTES_BIG_ENDIAN
2736    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2737                 GET_MODE_NUNITS (<V128>mode) - 1)"
2738   {
2739     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2740     return "dup\t%0.q, %1.q[0]";
2741   }
2742 )
2743
2744 ;; This is used for vec_duplicate<mode>s from memory, but can also
2745 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2746 ;; with zero.
2747 (define_insn "sve_ld1r<mode>"
2748   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2749         (unspec:SVE_ALL
2750           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2751            (vec_duplicate:SVE_ALL
2752              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2753            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2754           UNSPEC_SEL))]
2755   "TARGET_SVE"
2756   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2757 )
2758
2759 ;; Load 128 bits from memory under predicate control and duplicate to
2760 ;; fill a vector.
2761 (define_insn "@aarch64_sve_ld1rq<mode>"
2762   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2763         (unspec:SVE_FULL
2764           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2765            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2766           UNSPEC_LD1RQ))]
2767   "TARGET_SVE"
2768   {
2769     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2770     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2771   }
2772 )
2773
2774 (define_insn "@aarch64_sve_ld1ro<mode>"
2775   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2776         (unspec:SVE_FULL
2777           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2778            (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2779                                "UO<Vesize>")]
2780           UNSPEC_LD1RO))]
2781   "TARGET_SVE_F64MM && TARGET_NON_STREAMING"
2782   {
2783     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2784     return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2785   }
2786 )
2787
2788 ;; -------------------------------------------------------------------------
2789 ;; ---- [INT,FP] Initialize from individual elements
2790 ;; -------------------------------------------------------------------------
2791 ;; Includes:
2792 ;; - INSR
2793 ;; -------------------------------------------------------------------------
2794
2795 (define_expand "vec_init<mode><Vel>"
2796   [(match_operand:SVE_FULL 0 "register_operand")
2797     (match_operand 1 "")]
2798   "TARGET_SVE"
2799   {
2800     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2801     DONE;
2802   }
2803 )
2804
2805 ;; Shift an SVE vector left and insert a scalar into element 0.
2806 (define_insn "vec_shl_insert_<mode>"
2807   [(set (match_operand:SVE_FULL 0 "register_operand")
2808         (unspec:SVE_FULL
2809           [(match_operand:SVE_FULL 1 "register_operand")
2810            (match_operand:<VEL> 2 "aarch64_reg_or_zero")]
2811           UNSPEC_INSR))]
2812   "TARGET_SVE"
2813   {@ [ cons: =0 , 1 , 2  ; attrs: movprfx ]
2814      [ ?w       , 0 , rZ ; *              ] insr\t%0.<Vetype>, %<vwcore>2
2815      [ w        , 0 , w  ; *              ] insr\t%0.<Vetype>, %<Vetype>2
2816      [ ??&w     , w , rZ ; yes            ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2817      [ ?&w      , w , w  ; yes            ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2
2818   }
2819 )
2820
2821 ;; -------------------------------------------------------------------------
2822 ;; ---- [INT] Linear series
2823 ;; -------------------------------------------------------------------------
2824 ;; Includes:
2825 ;; - INDEX
2826 ;; -------------------------------------------------------------------------
2827
2828 (define_insn "vec_series<mode>"
2829   [(set (match_operand:SVE_I 0 "register_operand")
2830         (vec_series:SVE_I
2831           (match_operand:<VEL> 1 "aarch64_sve_index_operand")
2832           (match_operand:<VEL> 2 "aarch64_sve_index_operand")))]
2833   "TARGET_SVE"
2834   {@ [ cons: =0 , 1   , 2    ]
2835      [ w        , Usi , r    ] index\t%0.<Vctype>, #%1, %<vccore>2
2836      [ w        , r   , Usi  ] index\t%0.<Vctype>, %<vccore>1, #%2
2837      [ w        , r   , r    ] index\t%0.<Vctype>, %<vccore>1, %<vccore>2
2838   }
2839 )
2840
2841 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2842 ;; of an INDEX instruction.
2843 (define_insn "*vec_series<mode>_plus"
2844   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2845         (plus:SVE_I
2846           (vec_duplicate:SVE_I
2847             (match_operand:<VEL> 1 "register_operand" "r"))
2848           (match_operand:SVE_I 2 "immediate_operand")))]
2849   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2850   {
2851     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2852     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2853   }
2854 )
2855
2856 ;; -------------------------------------------------------------------------
2857 ;; ---- [PRED] Duplicate element
2858 ;; -------------------------------------------------------------------------
2859 ;; The patterns in this section are synthetic.
2860 ;; -------------------------------------------------------------------------
2861
2862 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2863 ;; input into the top bit and using a WHILELO.  An alternative would be to
2864 ;; duplicate the input and do a compare with zero.
2865 (define_expand "vec_duplicate<mode>"
2866   [(set (match_operand:PRED_ALL 0 "register_operand")
2867         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2868   "TARGET_SVE"
2869   {
2870     rtx tmp = gen_reg_rtx (DImode);
2871     rtx op1 = gen_lowpart (DImode, operands[1]);
2872     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2873     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2874     DONE;
2875   }
2876 )
2877
2878 ;; =========================================================================
2879 ;; == Vector decomposition
2880 ;; =========================================================================
2881
2882 ;; -------------------------------------------------------------------------
2883 ;; ---- [INT,FP] Extract index
2884 ;; -------------------------------------------------------------------------
2885 ;; Includes:
2886 ;; - DUP    (Advanced SIMD)
2887 ;; - DUP    (SVE)
2888 ;; - EXT    (SVE)
2889 ;; - ST1    (Advanced SIMD)
2890 ;; - UMOV   (Advanced SIMD)
2891 ;; -------------------------------------------------------------------------
2892
2893 (define_expand "vec_extract<mode><Vel>"
2894   [(set (match_operand:<VEL> 0 "register_operand")
2895         (vec_select:<VEL>
2896           (match_operand:SVE_FULL 1 "register_operand")
2897           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2898   "TARGET_SVE"
2899   {
2900     poly_int64 val;
2901     if (poly_int_rtx_p (operands[2], &val)
2902         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2903       {
2904         /* The last element can be extracted with a LASTB and a false
2905            predicate.  */
2906         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2907         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2908         DONE;
2909       }
2910     if (!CONST_INT_P (operands[2]))
2911       {
2912         /* Create an index with operand[2] as the base and -1 as the step.
2913            It will then be zero for the element we care about.  */
2914         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2915         index = force_reg (<VEL_INT>mode, index);
2916         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2917         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2918
2919         /* Get a predicate that is true for only that element.  */
2920         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2921         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2922         rtx sel = gen_reg_rtx (<VPRED>mode);
2923         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2924
2925         /* Select the element using LASTB.  */
2926         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2927         DONE;
2928       }
2929   }
2930 )
2931
2932 ;; Extract element zero.  This is a special case because we want to force
2933 ;; the registers to be the same for the second alternative, and then
2934 ;; split the instruction into nothing after RA.
2935 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2936   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2937         (vec_select:<VEL>
2938           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2939           (parallel [(const_int 0)])))]
2940   "TARGET_SVE"
2941   {
2942     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2943     switch (which_alternative)
2944       {
2945         case 0:
2946           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2947         case 1:
2948           return "#";
2949         case 2:
2950           return "st1\\t{%1.<Vetype>}[0], %0";
2951         default:
2952           gcc_unreachable ();
2953       }
2954   }
2955   "&& reload_completed
2956    && REG_P (operands[0])
2957    && REGNO (operands[0]) == REGNO (operands[1])"
2958   [(const_int 0)]
2959   {
2960     emit_note (NOTE_INSN_DELETED);
2961     DONE;
2962   }
2963   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2964 )
2965
2966 ;; Extract an element from the Advanced SIMD portion of the register.
2967 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2968 ;; want any change in lane number on big-endian targets.
2969 (define_insn "*vec_extract<mode><Vel>_v128"
2970   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2971         (vec_select:<VEL>
2972           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2973           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2974   "TARGET_SVE
2975    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2976   {
2977     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2978     switch (which_alternative)
2979       {
2980         case 0:
2981           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2982         case 1:
2983           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2984         case 2:
2985           return "st1\\t{%1.<Vetype>}[%2], %0";
2986         default:
2987           gcc_unreachable ();
2988       }
2989   }
2990   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2991 )
2992
2993 ;; Extract an element in the range of DUP.  This pattern allows the
2994 ;; source and destination to be different.
2995 (define_insn "*vec_extract<mode><Vel>_dup"
2996   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2997         (vec_select:<VEL>
2998           (match_operand:SVE_FULL 1 "register_operand" "w")
2999           (parallel [(match_operand:SI 2 "const_int_operand")])))]
3000   "TARGET_SVE
3001    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
3002   {
3003     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3004     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
3005   }
3006 )
3007
3008 ;; Extract an element outside the range of DUP.  This pattern requires the
3009 ;; source and destination to be the same.
3010 (define_insn "*vec_extract<mode><Vel>_ext"
3011   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
3012         (vec_select:<VEL>
3013           (match_operand:SVE_FULL 1 "register_operand" "0, w")
3014           (parallel [(match_operand:SI 2 "const_int_operand")])))]
3015   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
3016   {
3017     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3018     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
3019     return (which_alternative == 0
3020             ? "ext\t%0.b, %0.b, %0.b, #%2"
3021             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
3022   }
3023   [(set_attr "movprfx" "*,yes")]
3024 )
3025
3026 ;; -------------------------------------------------------------------------
3027 ;; ---- [INT,FP] Extract active element
3028 ;; -------------------------------------------------------------------------
3029 ;; Includes:
3030 ;; - LASTA
3031 ;; - LASTB
3032 ;; -------------------------------------------------------------------------
3033
3034 ;; Extract the last active element of operand 1 into operand 0.
3035 ;; If no elements are active, extract the last inactive element instead.
3036 (define_insn "@extract_<last_op>_<mode>"
3037   [(set (match_operand:<VEL> 0 "register_operand")
3038         (unspec:<VEL>
3039           [(match_operand:<VPRED> 1 "register_operand")
3040            (match_operand:SVE_FULL 2 "register_operand")]
3041           LAST))]
3042   "TARGET_SVE"
3043   {@ [ cons: =0 , 1   , 2  ]
3044      [ ?r       , Upl , w  ] last<ab>\t%<vwcore>0, %1, %2.<Vetype>
3045      [ w        , Upl , w  ] last<ab>\t%<Vetype>0, %1, %2.<Vetype>
3046   }
3047 )
3048
3049 ;; -------------------------------------------------------------------------
3050 ;; ---- [PRED] Extract index
3051 ;; -------------------------------------------------------------------------
3052 ;; The patterns in this section are synthetic.
3053 ;; -------------------------------------------------------------------------
3054
3055 ;; Handle extractions from a predicate by converting to an integer vector
3056 ;; and extracting from there.
3057 (define_expand "vec_extract<vpred><Vel>"
3058   [(match_operand:<VEL> 0 "register_operand")
3059    (match_operand:<VPRED> 1 "register_operand")
3060    (match_operand:SI 2 "nonmemory_operand")
3061    ;; Dummy operand to which we can attach the iterator.
3062    (reg:SVE_FULL_I V0_REGNUM)]
3063   "TARGET_SVE"
3064   {
3065     rtx tmp = gen_reg_rtx (<MODE>mode);
3066     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
3067                                              CONST1_RTX (<MODE>mode),
3068                                              CONST0_RTX (<MODE>mode)));
3069     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
3070     DONE;
3071   }
3072 )
3073
3074 ;; =========================================================================
3075 ;; == Unary arithmetic
3076 ;; =========================================================================
3077
3078 ;; -------------------------------------------------------------------------
3079 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
3080 ;; -------------------------------------------------------------------------
3081 ;; Includes:
3082 ;; - ABS
3083 ;; - CLS (= clrsb)
3084 ;; - CLZ
3085 ;; - CNT (= popcount)
3086 ;; - RBIT (= bitreverse)
3087 ;; - NEG
3088 ;; - NOT
3089 ;; -------------------------------------------------------------------------
3090
3091 ;; Unpredicated integer unary arithmetic.
3092 (define_expand "<optab><mode>2"
3093   [(set (match_operand:SVE_I 0 "register_operand")
3094         (unspec:SVE_I
3095           [(match_dup 2)
3096            (SVE_INT_UNARY:SVE_I
3097              (match_operand:SVE_I 1 "register_operand"))]
3098           UNSPEC_PRED_X))]
3099   "TARGET_SVE"
3100   {
3101     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3102   }
3103 )
3104
3105 ;; Integer unary arithmetic predicated with a PTRUE.
3106 (define_insn "@aarch64_pred_<optab><mode>"
3107   [(set (match_operand:SVE_VDQ_I 0 "register_operand")
3108         (unspec:SVE_VDQ_I
3109           [(match_operand:<VPRED> 1 "register_operand")
3110            (SVE_INT_UNARY:SVE_VDQ_I
3111              (match_operand:SVE_VDQ_I 2 "register_operand"))]
3112           UNSPEC_PRED_X))]
3113   "TARGET_SVE"
3114   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3115      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3116      [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3117   }
3118 )
3119
3120 ;; Predicated integer unary arithmetic with merging.
3121 (define_expand "@cond_<optab><mode>"
3122   [(set (match_operand:SVE_I 0 "register_operand")
3123         (unspec:SVE_I
3124           [(match_operand:<VPRED> 1 "register_operand")
3125            (SVE_INT_UNARY:SVE_I
3126              (match_operand:SVE_I 2 "register_operand"))
3127            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3128           UNSPEC_SEL))]
3129   "TARGET_SVE"
3130 )
3131
3132 ;; Predicated integer unary arithmetic, merging with the first input.
3133 (define_insn "*cond_<optab><mode>_2"
3134   [(set (match_operand:SVE_I 0 "register_operand")
3135         (unspec:SVE_I
3136           [(match_operand:<VPRED> 1 "register_operand")
3137            (SVE_INT_UNARY:SVE_I
3138              (match_operand:SVE_I 2 "register_operand"))
3139            (match_dup 2)]
3140           UNSPEC_SEL))]
3141   "TARGET_SVE"
3142   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3143      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3144      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3145   }
3146 )
3147
3148 ;; Predicated integer unary arithmetic, merging with an independent value.
3149 ;;
3150 ;; The earlyclobber isn't needed for the first alternative, but omitting
3151 ;; it would only help the case in which operands 2 and 3 are the same,
3152 ;; which is handled above rather than here.  Marking all the alternatives
3153 ;; as earlyclobber helps to make the instruction more regular to the
3154 ;; register allocator.
3155 (define_insn "*cond_<optab><mode>_any"
3156   [(set (match_operand:SVE_I 0 "register_operand")
3157         (unspec:SVE_I
3158           [(match_operand:<VPRED> 1 "register_operand")
3159            (SVE_INT_UNARY:SVE_I
3160              (match_operand:SVE_I 2 "register_operand"))
3161            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3162           UNSPEC_SEL))]
3163   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3164   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3165      [ &w       , Upl , w , 0  ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3166      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3167      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3168   }
3169 )
3170
3171
3172 ;; -------------------------------------------------------------------------
3173 ;; ---- [INT] General unary arithmetic corresponding to unspecs
3174 ;; -------------------------------------------------------------------------
3175 ;; Includes
3176 ;; - REVB
3177 ;; - REVH
3178 ;; - REVW
3179 ;; -------------------------------------------------------------------------
3180
3181 ;; Predicated integer unary operations.
3182 (define_insn "@aarch64_pred_<optab><mode>"
3183   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3184         (unspec:SVE_FULL_I
3185           [(match_operand:<VPRED> 1 "register_operand")
3186            (unspec:SVE_FULL_I
3187              [(match_operand:SVE_FULL_I 2 "register_operand")]
3188              SVE_INT_UNARY)]
3189           UNSPEC_PRED_X))]
3190   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3191   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3192      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3193      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3194   }
3195 )
3196
3197 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3198 ;; form being easier for permutes.  The predicate mode determines the number
3199 ;; of lanes and the data mode decides the granularity of the reversal within
3200 ;; each lane.
3201 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3202   [(set (match_operand:SVE_ALL 0 "register_operand")
3203         (unspec:SVE_ALL
3204           [(match_operand:PRED_HSD 1 "register_operand")
3205            (unspec:SVE_ALL
3206              [(match_operand:SVE_ALL 2 "register_operand")]
3207              UNSPEC_REVBHW)]
3208           UNSPEC_PRED_X))]
3209   "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3210   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3211      [ w        , Upl , 0 ; *              ] rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3212      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3213   }
3214 )
3215
3216 ;; Predicated integer unary operations with merging.
3217 (define_insn "@cond_<optab><mode>"
3218   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3219         (unspec:SVE_FULL_I
3220           [(match_operand:<VPRED> 1 "register_operand")
3221            (unspec:SVE_FULL_I
3222              [(match_operand:SVE_FULL_I 2 "register_operand")]
3223              SVE_INT_UNARY)
3224            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3225           UNSPEC_SEL))]
3226   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3227   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3228      [ w        , Upl , w , 0  ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3229      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3230      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3231   }
3232 )
3233
3234 ;; -------------------------------------------------------------------------
3235 ;; ---- [INT] Sign and zero extension
3236 ;; -------------------------------------------------------------------------
3237 ;; Includes:
3238 ;; - SXTB
3239 ;; - SXTH
3240 ;; - SXTW
3241 ;; - UXTB
3242 ;; - UXTH
3243 ;; - UXTW
3244 ;; -------------------------------------------------------------------------
3245
3246 ;; Unpredicated sign and zero extension from a narrower mode.
3247 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3248   [(set (match_operand:SVE_HSDI 0 "register_operand")
3249         (unspec:SVE_HSDI
3250           [(match_dup 2)
3251            (ANY_EXTEND:SVE_HSDI
3252              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3253           UNSPEC_PRED_X))]
3254   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3255   {
3256     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3257   }
3258 )
3259
3260 ;; Predicated sign and zero extension from a narrower mode.
3261 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3262   [(set (match_operand:SVE_HSDI 0 "register_operand")
3263         (unspec:SVE_HSDI
3264           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
3265            (ANY_EXTEND:SVE_HSDI
3266              (match_operand:SVE_PARTIAL_I 2 "register_operand"))]
3267           UNSPEC_PRED_X))]
3268   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3269   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3270      [ w        , Upl , 0 ; *              ] <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3271      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3272   }
3273 )
3274
3275 ;; Predicated truncate-and-sign-extend operations.
3276 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3277   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3278         (unspec:SVE_FULL_HSDI
3279           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3280            (sign_extend:SVE_FULL_HSDI
3281              (truncate:SVE_PARTIAL_I
3282                (match_operand:SVE_FULL_HSDI 2 "register_operand")))]
3283           UNSPEC_PRED_X))]
3284   "TARGET_SVE
3285    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3286   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3287      [ w        , Upl , 0 ; *              ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3288      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3289   }
3290 )
3291
3292 ;; Predicated truncate-and-sign-extend operations with merging.
3293 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3294   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3295         (unspec:SVE_FULL_HSDI
3296           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3297            (sign_extend:SVE_FULL_HSDI
3298              (truncate:SVE_PARTIAL_I
3299                (match_operand:SVE_FULL_HSDI 2 "register_operand")))
3300            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
3301           UNSPEC_SEL))]
3302   "TARGET_SVE
3303    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3304   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3305      [ w        , Upl , w , 0  ; *              ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3306      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3307      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3308   }
3309 )
3310
3311 ;; Predicated truncate-and-zero-extend operations, merging with the
3312 ;; first input.
3313 ;;
3314 ;; The canonical form of this operation is an AND of a constant rather
3315 ;; than (zero_extend (truncate ...)).
3316 (define_insn "*cond_uxt<mode>_2"
3317   [(set (match_operand:SVE_I 0 "register_operand")
3318         (unspec:SVE_I
3319           [(match_operand:<VPRED> 1 "register_operand")
3320            (and:SVE_I
3321              (match_operand:SVE_I 2 "register_operand")
3322              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3323            (match_dup 2)]
3324           UNSPEC_SEL))]
3325   "TARGET_SVE"
3326   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3327      [ w        , Upl , 0 ; *              ] uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3328      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3329   }
3330 )
3331
3332 ;; Predicated truncate-and-zero-extend operations, merging with an
3333 ;; independent value.
3334 ;;
3335 ;; The earlyclobber isn't needed for the first alternative, but omitting
3336 ;; it would only help the case in which operands 2 and 4 are the same,
3337 ;; which is handled above rather than here.  Marking all the alternatives
3338 ;; as early-clobber helps to make the instruction more regular to the
3339 ;; register allocator.
3340 (define_insn "*cond_uxt<mode>_any"
3341   [(set (match_operand:SVE_I 0 "register_operand")
3342         (unspec:SVE_I
3343           [(match_operand:<VPRED> 1 "register_operand")
3344            (and:SVE_I
3345              (match_operand:SVE_I 2 "register_operand")
3346              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3347            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3348           UNSPEC_SEL))]
3349   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3350   {@ [ cons: =0 , 1   , 2 , 4  ; attrs: movprfx ]
3351      [ &w       , Upl , w , 0  ; *              ] uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3352      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3353      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3354   }
3355 )
3356
3357 ;; -------------------------------------------------------------------------
3358 ;; ---- [INT] Truncation
3359 ;; -------------------------------------------------------------------------
3360 ;; The patterns in this section are synthetic.
3361 ;; -------------------------------------------------------------------------
3362
3363 ;; Truncate to a partial SVE vector from either a full vector or a
3364 ;; wider partial vector.  This is a no-op, because we can just ignore
3365 ;; the unused upper bits of the source.
3366 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3367   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3368         (truncate:SVE_PARTIAL_I
3369           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3370   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3371   "#"
3372   "&& reload_completed"
3373   [(set (match_dup 0) (match_dup 1))]
3374   {
3375     operands[1] = aarch64_replace_reg_mode (operands[1],
3376                                             <SVE_PARTIAL_I:MODE>mode);
3377   }
3378 )
3379
3380 ;; -------------------------------------------------------------------------
3381 ;; ---- [INT] Logical inverse
3382 ;; -------------------------------------------------------------------------
3383 ;; Includes:
3384 ;; - CNOT
3385 ;; -------------------------------------------------------------------------
3386
3387 ;; Logical inverse, predicated with a ptrue.
3388 (define_expand "@aarch64_ptrue_cnot<mode>"
3389   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3390         (unspec:SVE_FULL_I
3391           [(unspec:<VPRED>
3392              [(match_operand:<VPRED> 1 "register_operand")
3393               (const_int SVE_KNOWN_PTRUE)
3394               (eq:<VPRED>
3395                 (match_operand:SVE_FULL_I 2 "register_operand")
3396                 (match_dup 3))]
3397              UNSPEC_PRED_Z)
3398            (match_dup 4)
3399            (match_dup 3)]
3400           UNSPEC_SEL))]
3401   "TARGET_SVE"
3402   {
3403     operands[3] = CONST0_RTX (<MODE>mode);
3404     operands[4] = CONST1_RTX (<MODE>mode);
3405   }
3406 )
3407
3408 (define_insn "*cnot<mode>"
3409   [(set (match_operand:SVE_I 0 "register_operand")
3410         (unspec:SVE_I
3411           [(unspec:<VPRED>
3412              [(match_operand:<VPRED> 1 "register_operand")
3413               (const_int SVE_KNOWN_PTRUE)
3414               (eq:<VPRED>
3415                 (match_operand:SVE_I 2 "register_operand")
3416                 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3417              UNSPEC_PRED_Z)
3418            (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3419            (match_dup 3)]
3420           UNSPEC_SEL))]
3421   "TARGET_SVE"
3422   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3423      [ w        , Upl , 0 ; *              ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3424      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3425   }
3426 )
3427
3428 ;; Predicated logical inverse with merging.
3429 (define_expand "@cond_cnot<mode>"
3430   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3431         (unspec:SVE_FULL_I
3432           [(match_operand:<VPRED> 1 "register_operand")
3433            (unspec:SVE_FULL_I
3434              [(unspec:<VPRED>
3435                 [(match_dup 4)
3436                  (const_int SVE_KNOWN_PTRUE)
3437                  (eq:<VPRED>
3438                    (match_operand:SVE_FULL_I 2 "register_operand")
3439                    (match_dup 5))]
3440                 UNSPEC_PRED_Z)
3441               (match_dup 6)
3442               (match_dup 5)]
3443              UNSPEC_SEL)
3444            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3445           UNSPEC_SEL))]
3446   "TARGET_SVE"
3447   {
3448     operands[4] = CONSTM1_RTX (<VPRED>mode);
3449     operands[5] = CONST0_RTX (<MODE>mode);
3450     operands[6] = CONST1_RTX (<MODE>mode);
3451   }
3452 )
3453
3454 ;; Predicated logical inverse, merging with the first input.
3455 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3456   [(set (match_operand:SVE_I 0 "register_operand")
3457         (unspec:SVE_I
3458           [(match_operand:<VPRED> 1 "register_operand")
3459            ;; Logical inverse of operand 2 (as above).
3460            (unspec:SVE_I
3461              [(unspec:<VPRED>
3462                 [(match_operand 5)
3463                  (const_int SVE_KNOWN_PTRUE)
3464                  (eq:<VPRED>
3465                    (match_operand:SVE_I 2 "register_operand")
3466                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3467                 UNSPEC_PRED_Z)
3468               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3469               (match_dup 3)]
3470              UNSPEC_SEL)
3471            (match_dup 2)]
3472           UNSPEC_SEL))]
3473   "TARGET_SVE"
3474   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3475      [ w        , Upl , 0 ; *              ] cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3476      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3477   }
3478   "&& !CONSTANT_P (operands[5])"
3479   {
3480     operands[5] = CONSTM1_RTX (<VPRED>mode);
3481   }
3482 )
3483
3484 ;; Predicated logical inverse, merging with an independent value.
3485 ;;
3486 ;; The earlyclobber isn't needed for the first alternative, but omitting
3487 ;; it would only help the case in which operands 2 and 6 are the same,
3488 ;; which is handled above rather than here.  Marking all the alternatives
3489 ;; as earlyclobber helps to make the instruction more regular to the
3490 ;; register allocator.
3491 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3492   [(set (match_operand:SVE_I 0 "register_operand")
3493         (unspec:SVE_I
3494           [(match_operand:<VPRED> 1 "register_operand")
3495            ;; Logical inverse of operand 2 (as above).
3496            (unspec:SVE_I
3497              [(unspec:<VPRED>
3498                 [(match_operand 5)
3499                  (const_int SVE_KNOWN_PTRUE)
3500                  (eq:<VPRED>
3501                    (match_operand:SVE_I 2 "register_operand")
3502                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3503                 UNSPEC_PRED_Z)
3504               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3505               (match_dup 3)]
3506              UNSPEC_SEL)
3507            (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero")]
3508           UNSPEC_SEL))]
3509   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3510   {@ [ cons: =0 , 1   , 2 , 6  ; attrs: movprfx ]
3511      [ &w       , Upl , w , 0  ; *              ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3512      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3513      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3514   }
3515   "&& !CONSTANT_P (operands[5])"
3516   {
3517     operands[5] = CONSTM1_RTX (<VPRED>mode);
3518   }
3519 )
3520
3521 ;; -------------------------------------------------------------------------
3522 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3523 ;; -------------------------------------------------------------------------
3524 ;; Includes:
3525 ;; - FEXPA
3526 ;; -------------------------------------------------------------------------
3527
3528 ;; Unpredicated unary operations that take an integer and return a float.
3529 (define_insn "@aarch64_sve_<optab><mode>"
3530   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3531         (unspec:SVE_FULL_F
3532           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3533           SVE_FP_UNARY_INT))]
3534   "TARGET_SVE"
3535   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3536 )
3537
3538 ;; -------------------------------------------------------------------------
3539 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3540 ;; -------------------------------------------------------------------------
3541 ;; Includes:
3542 ;; - FABS
3543 ;; - FNEG
3544 ;; - FRECPE
3545 ;; - FRECPX
3546 ;; - FRINTA
3547 ;; - FRINTI
3548 ;; - FRINTM
3549 ;; - FRINTN
3550 ;; - FRINTP
3551 ;; - FRINTX
3552 ;; - FRINTZ
3553 ;; - FRSQRTE
3554 ;; - FSQRT
3555 ;; -------------------------------------------------------------------------
3556
3557 ;; Unpredicated floating-point unary operations.
3558 (define_insn "@aarch64_sve_<optab><mode>"
3559   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3560         (unspec:SVE_FULL_F
3561           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3562           SVE_FP_UNARY))]
3563   "TARGET_SVE"
3564   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3565 )
3566
3567 ;; Unpredicated floating-point unary operations.
3568 (define_expand "<optab><mode>2"
3569   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3570         (unspec:SVE_FULL_F
3571           [(match_dup 2)
3572            (const_int SVE_RELAXED_GP)
3573            (match_operand:SVE_FULL_F 1 "register_operand")]
3574           SVE_COND_FP_UNARY_OPTAB))]
3575   "TARGET_SVE"
3576   {
3577     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3578   }
3579 )
3580
3581 ;; Predicated floating-point unary operations.
3582 (define_insn "@aarch64_pred_<optab><mode>"
3583   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3584         (unspec:SVE_FULL_F
3585           [(match_operand:<VPRED> 1 "register_operand")
3586            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3587            (match_operand:SVE_FULL_F 2 "register_operand")]
3588           SVE_COND_FP_UNARY))]
3589   "TARGET_SVE"
3590   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3591      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3592      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3593   }
3594 )
3595
3596 ;; Predicated floating-point unary arithmetic with merging.
3597 (define_expand "@cond_<optab><mode>"
3598   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3599         (unspec:SVE_FULL_F
3600           [(match_operand:<VPRED> 1 "register_operand")
3601            (unspec:SVE_FULL_F
3602              [(match_dup 1)
3603               (const_int SVE_STRICT_GP)
3604               (match_operand:SVE_FULL_F 2 "register_operand")]
3605              SVE_COND_FP_UNARY)
3606            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3607           UNSPEC_SEL))]
3608   "TARGET_SVE"
3609 )
3610
3611 ;; Predicated floating-point unary arithmetic, merging with the first input.
3612 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3613   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3614         (unspec:SVE_FULL_F
3615           [(match_operand:<VPRED> 1 "register_operand")
3616            (unspec:SVE_FULL_F
3617              [(match_operand 3)
3618               (const_int SVE_RELAXED_GP)
3619               (match_operand:SVE_FULL_F 2 "register_operand")]
3620              SVE_COND_FP_UNARY)
3621            (match_dup 2)]
3622           UNSPEC_SEL))]
3623   "TARGET_SVE"
3624   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3625      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3626      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3627   }
3628   "&& !rtx_equal_p (operands[1], operands[3])"
3629   {
3630     operands[3] = copy_rtx (operands[1]);
3631   }
3632 )
3633
3634 (define_insn "*cond_<optab><mode>_2_strict"
3635   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3636         (unspec:SVE_FULL_F
3637           [(match_operand:<VPRED> 1 "register_operand")
3638            (unspec:SVE_FULL_F
3639              [(match_dup 1)
3640               (const_int SVE_STRICT_GP)
3641               (match_operand:SVE_FULL_F 2 "register_operand")]
3642              SVE_COND_FP_UNARY)
3643            (match_dup 2)]
3644           UNSPEC_SEL))]
3645   "TARGET_SVE"
3646   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3647      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3648      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3649   }
3650 )
3651
3652 ;; Predicated floating-point unary arithmetic, merging with an independent
3653 ;; value.
3654 ;;
3655 ;; The earlyclobber isn't needed for the first alternative, but omitting
3656 ;; it would only help the case in which operands 2 and 3 are the same,
3657 ;; which is handled above rather than here.  Marking all the alternatives
3658 ;; as earlyclobber helps to make the instruction more regular to the
3659 ;; register allocator.
3660 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3661   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3662         (unspec:SVE_FULL_F
3663           [(match_operand:<VPRED> 1 "register_operand")
3664            (unspec:SVE_FULL_F
3665              [(match_operand 4)
3666               (const_int SVE_RELAXED_GP)
3667               (match_operand:SVE_FULL_F 2 "register_operand")]
3668              SVE_COND_FP_UNARY)
3669            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3670           UNSPEC_SEL))]
3671   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3672   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3673      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3674      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3675      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3676   }
3677   "&& !rtx_equal_p (operands[1], operands[4])"
3678   {
3679     operands[4] = copy_rtx (operands[1]);
3680   }
3681 )
3682
3683 (define_insn "*cond_<optab><mode>_any_strict"
3684   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3685         (unspec:SVE_FULL_F
3686           [(match_operand:<VPRED> 1 "register_operand")
3687            (unspec:SVE_FULL_F
3688              [(match_dup 1)
3689               (const_int SVE_STRICT_GP)
3690               (match_operand:SVE_FULL_F 2 "register_operand")]
3691              SVE_COND_FP_UNARY)
3692            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3693           UNSPEC_SEL))]
3694   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3695   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3696      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3697      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3698      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3699   }
3700 )
3701
3702 ;; -------------------------------------------------------------------------
3703 ;; ---- [FP] Square root
3704 ;; -------------------------------------------------------------------------
3705
3706 (define_expand "sqrt<mode>2"
3707   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3708         (unspec:SVE_FULL_F
3709           [(match_dup 2)
3710            (const_int SVE_RELAXED_GP)
3711            (match_operand:SVE_FULL_F 1 "register_operand")]
3712           UNSPEC_COND_FSQRT))]
3713   "TARGET_SVE"
3714 {
3715   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3716     DONE;
3717   operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3718 })
3719
3720 ;; -------------------------------------------------------------------------
3721 ;; ---- [FP] Reciprocal square root
3722 ;; -------------------------------------------------------------------------
3723
3724 (define_expand "rsqrt<mode>2"
3725   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3726         (unspec:SVE_FULL_SDF
3727           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3728           UNSPEC_RSQRT))]
3729   "TARGET_SVE"
3730 {
3731   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3732   DONE;
3733 })
3734
3735 (define_expand "@aarch64_rsqrte<mode>"
3736   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3737         (unspec:SVE_FULL_SDF
3738           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3739           UNSPEC_RSQRTE))]
3740   "TARGET_SVE"
3741 )
3742
3743 (define_expand "@aarch64_rsqrts<mode>"
3744   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3745         (unspec:SVE_FULL_SDF
3746           [(match_operand:SVE_FULL_SDF 1 "register_operand")
3747            (match_operand:SVE_FULL_SDF 2 "register_operand")]
3748           UNSPEC_RSQRTS))]
3749   "TARGET_SVE"
3750 )
3751
3752 ;; -------------------------------------------------------------------------
3753 ;; ---- [PRED] Inverse
3754 ;; -------------------------------------------------------------------------
3755 ;; Includes:
3756 ;; - NOT
3757 ;; -------------------------------------------------------------------------
3758
3759 ;; Unpredicated predicate inverse.
3760 (define_expand "one_cmpl<mode>2"
3761   [(set (match_operand:PRED_ALL 0 "register_operand")
3762         (and:PRED_ALL
3763           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3764           (match_dup 2)))]
3765   "TARGET_SVE"
3766   {
3767     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3768   }
3769 )
3770
3771 ;; Predicated predicate inverse.
3772 (define_insn "*one_cmpl<mode>3"
3773   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3774         (and:PRED_ALL
3775           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3776           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3777   "TARGET_SVE"
3778   "not\t%0.b, %1/z, %2.b"
3779 )
3780
3781 ;; =========================================================================
3782 ;; == Binary arithmetic
3783 ;; =========================================================================
3784
3785 ;; -------------------------------------------------------------------------
3786 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3787 ;; -------------------------------------------------------------------------
3788 ;; Includes:
3789 ;; - ADD    (merging form only)
3790 ;; - AND    (merging form only)
3791 ;; - ASR    (merging form only)
3792 ;; - EOR    (merging form only)
3793 ;; - LSL    (merging form only)
3794 ;; - LSR    (merging form only)
3795 ;; - MUL
3796 ;; - ORR    (merging form only)
3797 ;; - SMAX
3798 ;; - SMIN
3799 ;; - SQADD  (SVE2 merging form only)
3800 ;; - SQSUB  (SVE2 merging form only)
3801 ;; - SUB    (merging form only)
3802 ;; - UMAX
3803 ;; - UMIN
3804 ;; - UQADD  (SVE2 merging form only)
3805 ;; - UQSUB  (SVE2 merging form only)
3806 ;; -------------------------------------------------------------------------
3807
3808 ;; Unpredicated integer binary operations that have an immediate form.
3809 (define_expand "<optab><mode>3"
3810   [(set (match_operand:SVE_I 0 "register_operand")
3811         (unspec:SVE_I
3812           [(match_dup 3)
3813            (SVE_INT_BINARY_MULTI:SVE_I
3814              (match_operand:SVE_I 1 "register_operand")
3815              (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3816           UNSPEC_PRED_X))]
3817   "TARGET_SVE"
3818   {
3819     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3820   }
3821 )
3822
3823 ;; Unpredicated integer binary operations that have an immediate form.
3824 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
3825 ;; Make use of the overlap between Z and V registers to implement the V2DI
3826 ;; optab for TARGET_SVE.  The mulvnx2di3 expander can
3827 ;; handle the TARGET_SVE2 case transparently.
3828 (define_expand "mul<mode>3"
3829   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3830         (unspec:SVE_I_SIMD_DI
3831           [(match_dup 3)
3832            (mult:SVE_I_SIMD_DI
3833              (match_operand:SVE_I_SIMD_DI 1 "register_operand")
3834              (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
3835           UNSPEC_PRED_X))]
3836   "TARGET_SVE"
3837   {
3838     /* SVE2 supports the MUL (vectors, unpredicated) form.  Emit the simple
3839        pattern for it here rather than splitting off the MULT expander
3840        separately.  */
3841     if (TARGET_SVE2)
3842       {
3843         emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
3844                                                    operands[1], operands[2]));
3845         DONE;
3846       }
3847     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3848   }
3849 )
3850
3851 ;; Integer binary operations that have an immediate form, predicated
3852 ;; with a PTRUE.  We don't actually need the predicate for the first
3853 ;; and third alternatives, but using Upa or X isn't likely to gain much
3854 ;; and would make the instruction seem less uniform to the register
3855 ;; allocator.
3856 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3857   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3858         (unspec:SVE_I_SIMD_DI
3859           [(match_operand:<VPRED> 1 "register_operand")
3860            (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3861              (match_operand:SVE_I_SIMD_DI 2 "register_operand")
3862              (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
3863           UNSPEC_PRED_X))]
3864   "TARGET_SVE"
3865   {@ [ cons: =0 , 1   , 2  , 3             ; attrs: movprfx ]
3866      [ w        , Upl , %0 , <sve_imm_con> ; *              ] #
3867      [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3868      [ ?&w      , Upl , w  , <sve_imm_con> ; yes            ] #
3869      [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3870   }
3871   ; Split the unpredicated form after reload, so that we don't have
3872   ; the unnecessary PTRUE.
3873   "&& reload_completed
3874    && !register_operand (operands[3], <MODE>mode)"
3875   [(set (match_dup 0)
3876         (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
3877   ""
3878 )
3879
3880 ;; Unpredicated binary operations with a constant (post-RA only).
3881 ;; These are generated by splitting a predicated instruction whose
3882 ;; predicate is unused.
3883 (define_insn "*post_ra_<optab><mode>3"
3884   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
3885         (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3886           (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
3887           (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3888   "TARGET_SVE && reload_completed"
3889   "@
3890    <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
3891    movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
3892   [(set_attr "movprfx" "*,yes")]
3893 )
3894
3895 ;; Predicated integer operations with merging.
3896 (define_expand "@cond_<optab><mode>"
3897   [(set (match_operand:SVE_I 0 "register_operand")
3898         (unspec:SVE_I
3899           [(match_operand:<VPRED> 1 "register_operand")
3900            (SVE_INT_BINARY:SVE_I
3901              (match_operand:SVE_I 2 "register_operand")
3902              (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3903            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3904           UNSPEC_SEL))]
3905   "TARGET_SVE"
3906 )
3907
3908 ;; Predicated integer operations, merging with the first input.
3909 (define_insn "*cond_<optab><mode>_2"
3910   [(set (match_operand:SVE_I 0 "register_operand")
3911         (unspec:SVE_I
3912           [(match_operand:<VPRED> 1 "register_operand")
3913            (SVE_INT_BINARY:SVE_I
3914              (match_operand:SVE_I 2 "register_operand")
3915              (match_operand:SVE_I 3 "register_operand"))
3916            (match_dup 2)]
3917           UNSPEC_SEL))]
3918   "TARGET_SVE"
3919   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
3920      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3921      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3922   }
3923 )
3924
3925 ;; Predicated integer operations, merging with the second input.
3926 (define_insn "*cond_<optab><mode>_3"
3927   [(set (match_operand:SVE_I 0 "register_operand")
3928         (unspec:SVE_I
3929           [(match_operand:<VPRED> 1 "register_operand")
3930            (SVE_INT_BINARY:SVE_I
3931              (match_operand:SVE_I 2 "register_operand")
3932              (match_operand:SVE_I 3 "register_operand"))
3933            (match_dup 3)]
3934           UNSPEC_SEL))]
3935   "TARGET_SVE"
3936   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
3937      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3938      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3939   }
3940 )
3941
3942 ;; Predicated integer operations, merging with an independent value.
3943 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3944   [(set (match_operand:SVE_I 0 "register_operand")
3945         (unspec:SVE_I
3946           [(match_operand:<VPRED> 1 "register_operand")
3947            (SVE_INT_BINARY:SVE_I
3948              (match_operand:SVE_I 2 "register_operand")
3949              (match_operand:SVE_I 3 "register_operand"))
3950            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3951           UNSPEC_SEL))]
3952   "TARGET_SVE
3953    && !rtx_equal_p (operands[2], operands[4])
3954    && !rtx_equal_p (operands[3], operands[4])"
3955   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
3956      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3957      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3958      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3959      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3960      [ ?&w      , Upl , w , w , w   ] #
3961   }
3962   "&& reload_completed
3963    && register_operand (operands[4], <MODE>mode)
3964    && !rtx_equal_p (operands[0], operands[4])"
3965   {
3966     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3967                                              operands[4], operands[1]));
3968     operands[4] = operands[2] = operands[0];
3969   }
3970   [(set_attr "movprfx" "yes")]
3971 )
3972
3973 ;; -------------------------------------------------------------------------
3974 ;; ---- [INT] Addition
3975 ;; -------------------------------------------------------------------------
3976 ;; Includes:
3977 ;; - ADD
3978 ;; - DECB
3979 ;; - DECD
3980 ;; - DECH
3981 ;; - DECW
3982 ;; - INCB
3983 ;; - INCD
3984 ;; - INCH
3985 ;; - INCW
3986 ;; - SUB
3987 ;; -------------------------------------------------------------------------
3988
3989 (define_insn "add<mode>3"
3990   [(set (match_operand:SVE_I 0 "register_operand")
3991         (plus:SVE_I
3992           (match_operand:SVE_I 1 "register_operand")
3993           (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
3994   "TARGET_SVE"
3995   {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
3996      [ w        , %0 , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
3997      [ w        , 0  , vsn ; *              ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3998      [ w        , 0  , vsi ; *              ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
3999      [ ?w       , w  , vsa ; yes            ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
4000      [ ?w       , w  , vsn ; yes            ] movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4001      [ w        , w  , w   ; *              ] add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4002   }
4003 )
4004
4005 ;; Merging forms are handled through SVE_INT_BINARY.
4006
4007 ;; -------------------------------------------------------------------------
4008 ;; ---- [INT] Subtraction
4009 ;; -------------------------------------------------------------------------
4010 ;; Includes:
4011 ;; - SUB
4012 ;; - SUBR
4013 ;; -------------------------------------------------------------------------
4014
4015 (define_insn "sub<mode>3"
4016   [(set (match_operand:SVE_I 0 "register_operand")
4017         (minus:SVE_I
4018           (match_operand:SVE_I 1 "aarch64_sve_arith_operand")
4019           (match_operand:SVE_I 2 "register_operand")))]
4020   "TARGET_SVE"
4021   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
4022      [ w        , w   , w ; *              ] sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4023      [ w        , vsa , 0 ; *              ] subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4024      [ ?&w      , vsa , w ; yes            ] movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4025   }
4026 )
4027
4028 ;; Merging forms are handled through SVE_INT_BINARY.
4029
4030 ;; -------------------------------------------------------------------------
4031 ;; ---- [INT] Take address
4032 ;; -------------------------------------------------------------------------
4033 ;; Includes:
4034 ;; - ADR
4035 ;; -------------------------------------------------------------------------
4036
4037 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
4038 ;; but the svadrb intrinsics should preserve the user's choice.
4039 (define_insn "@aarch64_adr<mode>"
4040   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
4041         (unspec:SVE_FULL_SDI
4042           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
4043            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
4044           UNSPEC_ADR))]
4045   "TARGET_SVE && TARGET_NON_STREAMING"
4046   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
4047 )
4048
4049 ;; Same, but with the offset being sign-extended from the low 32 bits.
4050 (define_insn_and_rewrite "*aarch64_adr_sxtw"
4051   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4052         (unspec:VNx2DI
4053           [(match_operand:VNx2DI 1 "register_operand" "w")
4054            (unspec:VNx2DI
4055              [(match_operand 3)
4056               (sign_extend:VNx2DI
4057                 (truncate:VNx2SI
4058                   (match_operand:VNx2DI 2 "register_operand" "w")))]
4059              UNSPEC_PRED_X)]
4060           UNSPEC_ADR))]
4061   "TARGET_SVE && TARGET_NON_STREAMING"
4062   "adr\t%0.d, [%1.d, %2.d, sxtw]"
4063   "&& !CONSTANT_P (operands[3])"
4064   {
4065     operands[3] = CONSTM1_RTX (VNx2BImode);
4066   }
4067 )
4068
4069 ;; Same, but with the offset being zero-extended from the low 32 bits.
4070 (define_insn "*aarch64_adr_uxtw_unspec"
4071   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4072         (unspec:VNx2DI
4073           [(match_operand:VNx2DI 1 "register_operand" "w")
4074            (and:VNx2DI
4075              (match_operand:VNx2DI 2 "register_operand" "w")
4076              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
4077           UNSPEC_ADR))]
4078   "TARGET_SVE && TARGET_NON_STREAMING"
4079   "adr\t%0.d, [%1.d, %2.d, uxtw]"
4080 )
4081
4082 ;; Same, matching as a PLUS rather than unspec.
4083 (define_insn "*aarch64_adr_uxtw_and"
4084   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4085         (plus:VNx2DI
4086           (and:VNx2DI
4087             (match_operand:VNx2DI 2 "register_operand" "w")
4088             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
4089           (match_operand:VNx2DI 1 "register_operand" "w")))]
4090   "TARGET_SVE && TARGET_NON_STREAMING"
4091   "adr\t%0.d, [%1.d, %2.d, uxtw]"
4092 )
4093
4094 ;; ADR with a nonzero shift.
4095 (define_expand "@aarch64_adr<mode>_shift"
4096   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4097         (plus:SVE_FULL_SDI
4098           (unspec:SVE_FULL_SDI
4099             [(match_dup 4)
4100              (ashift:SVE_FULL_SDI
4101                (match_operand:SVE_FULL_SDI 2 "register_operand")
4102                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
4103             UNSPEC_PRED_X)
4104           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
4105   "TARGET_SVE && TARGET_NON_STREAMING"
4106   {
4107     operands[4] = CONSTM1_RTX (<VPRED>mode);
4108   }
4109 )
4110
4111 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
4112   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
4113         (plus:SVE_24I
4114           (unspec:SVE_24I
4115             [(match_operand 4)
4116              (ashift:SVE_24I
4117                (match_operand:SVE_24I 2 "register_operand" "w")
4118                (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
4119             UNSPEC_PRED_X)
4120           (match_operand:SVE_24I 1 "register_operand" "w")))]
4121   "TARGET_SVE && TARGET_NON_STREAMING"
4122   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
4123   "&& !CONSTANT_P (operands[4])"
4124   {
4125     operands[4] = CONSTM1_RTX (<VPRED>mode);
4126   }
4127 )
4128
4129 ;; Same, but with the index being sign-extended from the low 32 bits.
4130 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
4131   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4132         (plus:VNx2DI
4133           (unspec:VNx2DI
4134             [(match_operand 4)
4135              (ashift:VNx2DI
4136                (unspec:VNx2DI
4137                  [(match_operand 5)
4138                   (sign_extend:VNx2DI
4139                     (truncate:VNx2SI
4140                       (match_operand:VNx2DI 2 "register_operand" "w")))]
4141                  UNSPEC_PRED_X)
4142                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4143             UNSPEC_PRED_X)
4144           (match_operand:VNx2DI 1 "register_operand" "w")))]
4145   "TARGET_SVE && TARGET_NON_STREAMING"
4146   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
4147   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4148   {
4149     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
4150   }
4151 )
4152
4153 ;; Same, but with the index being zero-extended from the low 32 bits.
4154 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
4155   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4156         (plus:VNx2DI
4157           (unspec:VNx2DI
4158             [(match_operand 5)
4159              (ashift:VNx2DI
4160                (and:VNx2DI
4161                  (match_operand:VNx2DI 2 "register_operand" "w")
4162                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
4163                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4164             UNSPEC_PRED_X)
4165           (match_operand:VNx2DI 1 "register_operand" "w")))]
4166   "TARGET_SVE && TARGET_NON_STREAMING"
4167   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
4168   "&& !CONSTANT_P (operands[5])"
4169   {
4170     operands[5] = CONSTM1_RTX (VNx2BImode);
4171   }
4172 )
4173
4174 ;; -------------------------------------------------------------------------
4175 ;; ---- [INT] Absolute difference
4176 ;; -------------------------------------------------------------------------
4177 ;; Includes:
4178 ;; - SABD
4179 ;; - UABD
4180 ;; -------------------------------------------------------------------------
4181
4182 ;; Unpredicated integer absolute difference.
4183 (define_expand "<su>abd<mode>3"
4184   [(use (match_operand:SVE_I 0 "register_operand"))
4185    (USMAX:SVE_I
4186      (match_operand:SVE_I 1 "register_operand")
4187      (match_operand:SVE_I 2 "register_operand"))]
4188   "TARGET_SVE"
4189   {
4190     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
4191     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
4192                                                operands[2]));
4193     DONE;
4194   }
4195 )
4196
4197 ;; Predicated integer absolute difference.
4198 (define_insn "@aarch64_pred_<su>abd<mode>"
4199   [(set (match_operand:SVE_I 0 "register_operand")
4200         (minus:SVE_I
4201           (unspec:SVE_I
4202             [(match_operand:<VPRED> 1 "register_operand")
4203              (USMAX:SVE_I
4204                (match_operand:SVE_I 2 "register_operand")
4205                (match_operand:SVE_I 3 "register_operand"))]
4206             UNSPEC_PRED_X)
4207           (unspec:SVE_I
4208             [(match_dup 1)
4209              (<max_opp>:SVE_I
4210                (match_dup 2)
4211                (match_dup 3))]
4212             UNSPEC_PRED_X)))]
4213   "TARGET_SVE"
4214   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
4215      [ w        , Upl , %0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4216      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4217   }
4218 )
4219
4220 (define_expand "@aarch64_cond_<su>abd<mode>"
4221   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4222         (unspec:SVE_FULL_I
4223           [(match_operand:<VPRED> 1 "register_operand")
4224            (minus:SVE_FULL_I
4225              (unspec:SVE_FULL_I
4226                [(match_dup 1)
4227                 (USMAX:SVE_FULL_I
4228                   (match_operand:SVE_FULL_I 2 "register_operand")
4229                   (match_operand:SVE_FULL_I 3 "register_operand"))]
4230                UNSPEC_PRED_X)
4231              (unspec:SVE_FULL_I
4232                [(match_dup 1)
4233                 (<max_opp>:SVE_FULL_I
4234                   (match_dup 2)
4235                   (match_dup 3))]
4236                UNSPEC_PRED_X))
4237            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4238           UNSPEC_SEL))]
4239   "TARGET_SVE"
4240 {
4241   if (rtx_equal_p (operands[3], operands[4]))
4242     std::swap (operands[2], operands[3]);
4243 })
4244
4245 ;; Predicated integer absolute difference, merging with the first input.
4246 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4247   [(set (match_operand:SVE_I 0 "register_operand")
4248         (unspec:SVE_I
4249           [(match_operand:<VPRED> 1 "register_operand")
4250            (minus:SVE_I
4251              (unspec:SVE_I
4252                [(match_operand 4)
4253                 (USMAX:SVE_I
4254                   (match_operand:SVE_I 2 "register_operand")
4255                   (match_operand:SVE_I 3 "register_operand"))]
4256                UNSPEC_PRED_X)
4257              (unspec:SVE_I
4258                [(match_operand 5)
4259                 (<max_opp>:SVE_I
4260                   (match_dup 2)
4261                   (match_dup 3))]
4262                UNSPEC_PRED_X))
4263            (match_dup 2)]
4264           UNSPEC_SEL))]
4265   "TARGET_SVE"
4266   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4267      [ w        , Upl , 0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4268      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4269   }
4270   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4271   {
4272     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4273   }
4274 )
4275
4276 ;; Predicated integer absolute difference, merging with the second input.
4277 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4278   [(set (match_operand:SVE_I 0 "register_operand")
4279         (unspec:SVE_I
4280           [(match_operand:<VPRED> 1 "register_operand")
4281            (minus:SVE_I
4282              (unspec:SVE_I
4283                [(match_operand 4)
4284                 (USMAX:SVE_I
4285                   (match_operand:SVE_I 2 "register_operand")
4286                   (match_operand:SVE_I 3 "register_operand"))]
4287                UNSPEC_PRED_X)
4288              (unspec:SVE_I
4289                [(match_operand 5)
4290                 (<max_opp>:SVE_I
4291                   (match_dup 2)
4292                   (match_dup 3))]
4293                UNSPEC_PRED_X))
4294            (match_dup 3)]
4295           UNSPEC_SEL))]
4296   "TARGET_SVE"
4297   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4298      [ w        , Upl , w , 0 ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4299      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4300   }
4301   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4302   {
4303     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4304   }
4305 )
4306
4307 ;; Predicated integer absolute difference, merging with an independent value.
4308 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4309   [(set (match_operand:SVE_I 0 "register_operand")
4310         (unspec:SVE_I
4311           [(match_operand:<VPRED> 1 "register_operand")
4312            (minus:SVE_I
4313              (unspec:SVE_I
4314                [(match_operand 5)
4315                 (USMAX:SVE_I
4316                   (match_operand:SVE_I 2 "register_operand")
4317                   (match_operand:SVE_I 3 "register_operand"))]
4318                UNSPEC_PRED_X)
4319              (unspec:SVE_I
4320                [(match_operand 6)
4321                 (<max_opp>:SVE_I
4322                   (match_dup 2)
4323                   (match_dup 3))]
4324                UNSPEC_PRED_X))
4325            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4326           UNSPEC_SEL))]
4327   "TARGET_SVE
4328    && !rtx_equal_p (operands[2], operands[4])
4329    && !rtx_equal_p (operands[3], operands[4])"
4330   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4331      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4332      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4333      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4334      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4335      [ ?&w      , Upl , w , w , w   ] #
4336   }
4337   "&& 1"
4338   {
4339     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4340       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4341     else if (reload_completed
4342              && register_operand (operands[4], <MODE>mode)
4343              && !rtx_equal_p (operands[0], operands[4]))
4344       {
4345         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4346                                                  operands[4], operands[1]));
4347         operands[4] = operands[2] = operands[0];
4348       }
4349     else
4350       FAIL;
4351   }
4352   [(set_attr "movprfx" "yes")]
4353 )
4354
4355 ;; -------------------------------------------------------------------------
4356 ;; ---- [INT] Saturating addition and subtraction
4357 ;; -------------------------------------------------------------------------
4358 ;; - SQADD
4359 ;; - SQSUB
4360 ;; - UQADD
4361 ;; - UQSUB
4362 ;; -------------------------------------------------------------------------
4363
4364 ;; Unpredicated saturating signed addition and subtraction.
4365 (define_insn "@aarch64_sve_<optab><mode>"
4366   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4367         (SBINQOPS:SVE_FULL_I
4368           (match_operand:SVE_FULL_I 1 "register_operand")
4369           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand")))]
4370   "TARGET_SVE"
4371   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
4372      [ w        , 0 , vsQ ; *              ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4373      [ w        , 0 , vsS ; *              ] <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4374      [ ?&w      , w , vsQ ; yes            ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4375      [ ?&w      , w , vsS ; yes            ] movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4376      [ w        , w , w   ; *              ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4377   }
4378 )
4379
4380 ;; Unpredicated saturating unsigned addition and subtraction.
4381 (define_insn "@aarch64_sve_<optab><mode>"
4382   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4383         (UBINQOPS:SVE_FULL_I
4384           (match_operand:SVE_FULL_I 1 "register_operand")
4385           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand")))]
4386   "TARGET_SVE"
4387   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
4388      [ w        , 0 , vsa ; *              ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4389      [ ?&w      , w , vsa ; yes            ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4390      [ w        , w , w   ; *              ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4391   }
4392 )
4393
4394 ;; -------------------------------------------------------------------------
4395 ;; ---- [INT] Highpart multiplication
4396 ;; -------------------------------------------------------------------------
4397 ;; Includes:
4398 ;; - SMULH
4399 ;; - UMULH
4400 ;; -------------------------------------------------------------------------
4401
4402 ;; Unpredicated highpart multiplication.
4403 (define_expand "<su>mul<mode>3_highpart"
4404   [(set (match_operand:SVE_I 0 "register_operand")
4405         (unspec:SVE_I
4406           [(match_dup 3)
4407            (unspec:SVE_I
4408              [(match_operand:SVE_I 1 "register_operand")
4409               (match_operand:SVE_I 2 "register_operand")]
4410              MUL_HIGHPART)]
4411           UNSPEC_PRED_X))]
4412   "TARGET_SVE"
4413   {
4414     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4415   }
4416 )
4417
4418 ;; Predicated highpart multiplication.
4419 (define_insn "@aarch64_pred_<optab><mode>"
4420   [(set (match_operand:SVE_I 0 "register_operand")
4421         (unspec:SVE_I
4422           [(match_operand:<VPRED> 1 "register_operand")
4423            (unspec:SVE_I
4424              [(match_operand:SVE_I 2 "register_operand")
4425               (match_operand:SVE_I 3 "register_operand")]
4426              MUL_HIGHPART)]
4427           UNSPEC_PRED_X))]
4428   "TARGET_SVE"
4429   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
4430      [ w        , Upl , %0 , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4431      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4432   }
4433 )
4434
4435 ;; Predicated highpart multiplications with merging.
4436 (define_expand "@cond_<optab><mode>"
4437   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4438         (unspec:SVE_FULL_I
4439           [(match_operand:<VPRED> 1 "register_operand")
4440            (unspec:SVE_FULL_I
4441              [(match_operand:SVE_FULL_I 2 "register_operand")
4442               (match_operand:SVE_FULL_I 3 "register_operand")]
4443              MUL_HIGHPART)
4444            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4445           UNSPEC_SEL))]
4446   "TARGET_SVE"
4447 {
4448   /* Only target code is aware of these operations, so we don't need
4449      to handle the fully-general case.  */
4450   gcc_assert (rtx_equal_p (operands[2], operands[4])
4451               || CONSTANT_P (operands[4]));
4452 })
4453
4454 ;; Predicated highpart multiplications, merging with the first input.
4455 (define_insn "*cond_<optab><mode>_2"
4456   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4457         (unspec:SVE_FULL_I
4458           [(match_operand:<VPRED> 1 "register_operand")
4459            (unspec:SVE_FULL_I
4460              [(match_operand:SVE_FULL_I 2 "register_operand")
4461               (match_operand:SVE_FULL_I 3 "register_operand")]
4462              MUL_HIGHPART)
4463            (match_dup 2)]
4464           UNSPEC_SEL))]
4465   "TARGET_SVE"
4466   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4467      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4468      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4469   }
4470 )
4471
4472 ;; Predicated highpart multiplications, merging with zero.
4473 (define_insn "*cond_<optab><mode>_z"
4474   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4475         (unspec:SVE_FULL_I
4476           [(match_operand:<VPRED> 1 "register_operand")
4477            (unspec:SVE_FULL_I
4478              [(match_operand:SVE_FULL_I 2 "register_operand")
4479               (match_operand:SVE_FULL_I 3 "register_operand")]
4480              MUL_HIGHPART)
4481            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4482           UNSPEC_SEL))]
4483   "TARGET_SVE"
4484   {@ [ cons: =0 , 1   , 2  , 3  ]
4485      [ &w       , Upl , %0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4486      [ &w       , Upl , w  , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4487   }
4488   [(set_attr "movprfx" "yes")])
4489
4490 ;; -------------------------------------------------------------------------
4491 ;; ---- [INT] Division
4492 ;; -------------------------------------------------------------------------
4493 ;; Includes:
4494 ;; - SDIV
4495 ;; - SDIVR
4496 ;; - UDIV
4497 ;; - UDIVR
4498 ;; -------------------------------------------------------------------------
4499
4500 ;; Unpredicated integer division.
4501 ;; SVE has vector integer divisions, unlike Advanced SIMD.
4502 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
4503 ;; optabs to the midend.
4504 (define_expand "<optab><mode>3"
4505   [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4506         (unspec:SVE_FULL_SDI_SIMD
4507           [(match_dup 3)
4508            (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4509              (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
4510              (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
4511           UNSPEC_PRED_X))]
4512   "TARGET_SVE"
4513   {
4514     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4515   }
4516 )
4517
4518 ;; Integer division predicated with a PTRUE.
4519 (define_insn "@aarch64_pred_<optab><mode>"
4520   [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4521         (unspec:SVE_FULL_SDI_SIMD
4522           [(match_operand:<VPRED> 1 "register_operand")
4523            (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4524              (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
4525              (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
4526           UNSPEC_PRED_X))]
4527   "TARGET_SVE"
4528   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4529      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4530      [ w        , Upl , w , 0 ; *              ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
4531      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4532   }
4533 )
4534
4535 ;; Predicated integer division with merging.
4536 (define_expand "@cond_<optab><mode>"
4537   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4538         (unspec:SVE_FULL_SDI
4539           [(match_operand:<VPRED> 1 "register_operand")
4540            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4541              (match_operand:SVE_FULL_SDI 2 "register_operand")
4542              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4543            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4544           UNSPEC_SEL))]
4545   "TARGET_SVE"
4546 )
4547
4548 ;; Predicated integer division, merging with the first input.
4549 (define_insn "*cond_<optab><mode>_2"
4550   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4551         (unspec:SVE_FULL_SDI
4552           [(match_operand:<VPRED> 1 "register_operand")
4553            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4554              (match_operand:SVE_FULL_SDI 2 "register_operand")
4555              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4556            (match_dup 2)]
4557           UNSPEC_SEL))]
4558   "TARGET_SVE"
4559   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4560      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4561      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4562   }
4563 )
4564
4565 ;; Predicated integer division, merging with the second input.
4566 (define_insn "*cond_<optab><mode>_3"
4567   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4568         (unspec:SVE_FULL_SDI
4569           [(match_operand:<VPRED> 1 "register_operand")
4570            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4571              (match_operand:SVE_FULL_SDI 2 "register_operand")
4572              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4573            (match_dup 3)]
4574           UNSPEC_SEL))]
4575   "TARGET_SVE"
4576   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4577      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4578      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4579   }
4580 )
4581
4582 ;; Predicated integer division, merging with an independent value.
4583 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4584   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4585         (unspec:SVE_FULL_SDI
4586           [(match_operand:<VPRED> 1 "register_operand")
4587            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4588              (match_operand:SVE_FULL_SDI 2 "register_operand")
4589              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4590            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4591           UNSPEC_SEL))]
4592   "TARGET_SVE
4593    && !rtx_equal_p (operands[2], operands[4])
4594    && !rtx_equal_p (operands[3], operands[4])"
4595   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4596      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4597      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4598      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4599      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4600      [ ?&w      , Upl , w , w , w   ] #
4601   }
4602   "&& reload_completed
4603    && register_operand (operands[4], <MODE>mode)
4604    && !rtx_equal_p (operands[0], operands[4])"
4605   {
4606     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4607                                              operands[4], operands[1]));
4608     operands[4] = operands[2] = operands[0];
4609   }
4610   [(set_attr "movprfx" "yes")]
4611 )
4612
4613 ;; -------------------------------------------------------------------------
4614 ;; ---- [INT] Binary logical operations
4615 ;; -------------------------------------------------------------------------
4616 ;; Includes:
4617 ;; - AND
4618 ;; - EOR
4619 ;; - ORR
4620 ;; -------------------------------------------------------------------------
4621
4622 ;; Unpredicated integer binary logical operations.
4623 (define_insn "<optab><mode>3"
4624   [(set (match_operand:SVE_I 0 "register_operand")
4625         (LOGICAL:SVE_I
4626           (match_operand:SVE_I 1 "register_operand")
4627           (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
4628   "TARGET_SVE"
4629   {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
4630      [ w        , %0 , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4631      [ ?w       , w  , vsl ; yes            ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4632      [ w        , w  , w   ; *              ] <logical>\t%0.d, %1.d, %2.d
4633   }
4634 )
4635
4636 ;; Merging forms are handled through SVE_INT_BINARY.
4637
4638 ;; -------------------------------------------------------------------------
4639 ;; ---- [INT] Binary logical operations (inverted second input)
4640 ;; -------------------------------------------------------------------------
4641 ;; Includes:
4642 ;; - BIC
4643 ;; -------------------------------------------------------------------------
4644
4645 ;; Unpredicated BIC; andn named pattern.
4646 (define_expand "andn<mode>3"
4647   [(set (match_operand:SVE_I 0 "register_operand")
4648         (and:SVE_I
4649           (unspec:SVE_I
4650             [(match_dup 3)
4651              (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4652             UNSPEC_PRED_X)
4653           (match_operand:SVE_I 1 "register_operand")))]
4654   "TARGET_SVE"
4655   {
4656     operands[3] = CONSTM1_RTX (<VPRED>mode);
4657   }
4658 )
4659
4660 ;; Predicated BIC.
4661 (define_insn_and_rewrite "*bic<mode>3"
4662   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4663         (and:SVE_I
4664           (unspec:SVE_I
4665             [(match_operand 3)
4666              (not:SVE_I
4667                (match_operand:SVE_I 2 "register_operand" "w"))]
4668             UNSPEC_PRED_X)
4669           (match_operand:SVE_I 1 "register_operand" "w")))]
4670   "TARGET_SVE"
4671   "bic\t%0.d, %1.d, %2.d"
4672   "&& !CONSTANT_P (operands[3])"
4673   {
4674     operands[3] = CONSTM1_RTX (<VPRED>mode);
4675   }
4676 )
4677
4678 ;; Predicated BIC with merging.
4679 (define_expand "@cond_bic<mode>"
4680   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4681         (unspec:SVE_FULL_I
4682           [(match_operand:<VPRED> 1 "register_operand")
4683            (and:SVE_FULL_I
4684              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4685              (match_operand:SVE_FULL_I 2 "register_operand"))
4686            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4687           UNSPEC_SEL))]
4688   "TARGET_SVE"
4689 )
4690
4691 ;; Predicated integer BIC, merging with the first input.
4692 (define_insn "*cond_bic<mode>_2"
4693   [(set (match_operand:SVE_I 0 "register_operand")
4694         (unspec:SVE_I
4695           [(match_operand:<VPRED> 1 "register_operand")
4696            (and:SVE_I
4697              (not:SVE_I
4698                (match_operand:SVE_I 3 "register_operand"))
4699              (match_operand:SVE_I 2 "register_operand"))
4700            (match_dup 2)]
4701           UNSPEC_SEL))]
4702   "TARGET_SVE"
4703   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4704      [ w        , Upl , 0 , w ; *              ] bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4705      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4706   }
4707 )
4708
4709 ;; Predicated integer BIC, merging with an independent value.
4710 (define_insn_and_rewrite "*cond_bic<mode>_any"
4711   [(set (match_operand:SVE_I 0 "register_operand")
4712         (unspec:SVE_I
4713           [(match_operand:<VPRED> 1 "register_operand")
4714            (and:SVE_I
4715              (not:SVE_I
4716                (match_operand:SVE_I 3 "register_operand"))
4717              (match_operand:SVE_I 2 "register_operand"))
4718            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4719           UNSPEC_SEL))]
4720   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4721   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4722      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4723      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4724      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4725      [ ?&w      , Upl , w , w , w   ] #
4726   }
4727   "&& reload_completed
4728    && register_operand (operands[4], <MODE>mode)
4729    && !rtx_equal_p (operands[0], operands[4])"
4730   {
4731     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4732                                              operands[4], operands[1]));
4733     operands[4] = operands[2] = operands[0];
4734   }
4735   [(set_attr "movprfx" "yes")]
4736 )
4737
4738 ;; -------------------------------------------------------------------------
4739 ;; ---- [INT] Shifts (rounding towards -Inf)
4740 ;; -------------------------------------------------------------------------
4741 ;; Includes:
4742 ;; - ASR
4743 ;; - ASRR
4744 ;; - LSL
4745 ;; - LSLR
4746 ;; - LSR
4747 ;; - LSRR
4748 ;; -------------------------------------------------------------------------
4749
4750 ;; Unpredicated shift by a scalar, which expands into one of the vector
4751 ;; shifts below.
4752 (define_expand "<ASHIFT:optab><mode>3"
4753   [(set (match_operand:SVE_I 0 "register_operand")
4754         (ASHIFT:SVE_I
4755           (match_operand:SVE_I 1 "register_operand")
4756           (match_operand:<VEL> 2 "general_operand")))]
4757   "TARGET_SVE"
4758   {
4759     rtx amount;
4760     if (CONST_INT_P (operands[2]))
4761       {
4762         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4763         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4764           amount = force_reg (<MODE>mode, amount);
4765       }
4766     else
4767       {
4768         amount = convert_to_mode (<VEL>mode, operands[2], 0);
4769         amount = expand_vector_broadcast (<MODE>mode, amount);
4770       }
4771     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4772     DONE;
4773   }
4774 )
4775
4776 ;; Unpredicated shift by a vector.
4777 (define_expand "v<optab><mode>3"
4778   [(set (match_operand:SVE_I 0 "register_operand")
4779         (unspec:SVE_I
4780           [(match_dup 3)
4781            (ASHIFT:SVE_I
4782              (match_operand:SVE_I 1 "register_operand")
4783              (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4784           UNSPEC_PRED_X))]
4785   "TARGET_SVE"
4786   {
4787     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4788   }
4789 )
4790
4791 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4792 ;; the predicate for the first alternative, but using Upa or X isn't
4793 ;; likely to gain much and would make the instruction seem less uniform
4794 ;; to the register allocator.
4795 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4796   [(set (match_operand:SVE_I 0 "register_operand")
4797         (unspec:SVE_I
4798           [(match_operand:<VPRED> 1 "register_operand")
4799            (ASHIFT:SVE_I
4800              (match_operand:SVE_I 2 "register_operand")
4801              (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
4802           UNSPEC_PRED_X))]
4803   "TARGET_SVE"
4804   {@ [ cons: =0 , 1   , 2 , 3     ; attrs: movprfx ]
4805      [ w        , Upl , w , D<lr> ; *              ] #
4806      [ w        , Upl , 0 , w     ; *              ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4807      [ w        , Upl , w , 0     ; *              ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4808      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4809   }
4810   "&& reload_completed
4811    && !register_operand (operands[3], <MODE>mode)"
4812   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4813   ""
4814 )
4815
4816 ;; Unpredicated shift operations by a constant (post-RA only).
4817 ;; These are generated by splitting a predicated instruction whose
4818 ;; predicate is unused.
4819 (define_insn "*post_ra_v_ashl<mode>3"
4820   [(set (match_operand:SVE_I 0 "register_operand")
4821         (ashift:SVE_I
4822           (match_operand:SVE_I 1 "register_operand")
4823           (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
4824   "TARGET_SVE && reload_completed"
4825   {@ [ cons: =0 , 1 , 2   ]
4826      [ w        , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
4827      [ w        , w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
4828   }
4829 )
4830
4831 (define_insn "*post_ra_v_<optab><mode>3"
4832   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4833         (SHIFTRT:SVE_I
4834           (match_operand:SVE_I 1 "register_operand" "w")
4835           (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
4836   "TARGET_SVE && reload_completed"
4837   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4838 )
4839
4840 ;; Predicated integer shift, merging with the first input.
4841 (define_insn "*cond_<optab><mode>_2_const"
4842   [(set (match_operand:SVE_I 0 "register_operand")
4843         (unspec:SVE_I
4844           [(match_operand:<VPRED> 1 "register_operand")
4845            (ASHIFT:SVE_I
4846              (match_operand:SVE_I 2 "register_operand")
4847              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4848            (match_dup 2)]
4849          UNSPEC_SEL))]
4850   "TARGET_SVE"
4851   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
4852      [ w        , Upl , 0 ; *              ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4853      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4854   }
4855 )
4856
4857 ;; Predicated integer shift, merging with an independent value.
4858 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4859   [(set (match_operand:SVE_I 0 "register_operand")
4860         (unspec:SVE_I
4861           [(match_operand:<VPRED> 1 "register_operand")
4862            (ASHIFT:SVE_I
4863              (match_operand:SVE_I 2 "register_operand")
4864              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4865            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4866          UNSPEC_SEL))]
4867   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4868   {@ [ cons: =0 , 1   , 2 , 4   ]
4869      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4870      [ &w       , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4871      [ ?&w      , Upl , w , w   ] #
4872   }
4873   "&& reload_completed
4874    && register_operand (operands[4], <MODE>mode)
4875    && !rtx_equal_p (operands[0], operands[4])"
4876   {
4877     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4878                                              operands[4], operands[1]));
4879     operands[4] = operands[2] = operands[0];
4880   }
4881   [(set_attr "movprfx" "yes")]
4882 )
4883
4884 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4885 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4886   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4887         (unspec:SVE_FULL_BHSI
4888           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4889            (match_operand:VNx2DI 2 "register_operand" "w")]
4890           SVE_SHIFT_WIDE))]
4891   "TARGET_SVE"
4892   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4893 )
4894
4895 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4896 (define_expand "@cond_<sve_int_op><mode>"
4897   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4898         (unspec:SVE_FULL_BHSI
4899           [(match_operand:<VPRED> 1 "register_operand")
4900            (unspec:SVE_FULL_BHSI
4901              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4902               (match_operand:VNx2DI 3 "register_operand")]
4903              SVE_SHIFT_WIDE)
4904            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4905           UNSPEC_SEL))]
4906   "TARGET_SVE"
4907 )
4908
4909 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4910 ;; the first input.
4911 (define_insn "*cond_<sve_int_op><mode>_m"
4912   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4913         (unspec:SVE_FULL_BHSI
4914           [(match_operand:<VPRED> 1 "register_operand")
4915            (unspec:SVE_FULL_BHSI
4916              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4917               (match_operand:VNx2DI 3 "register_operand")]
4918              SVE_SHIFT_WIDE)
4919            (match_dup 2)]
4920          UNSPEC_SEL))]
4921   "TARGET_SVE"
4922   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4923      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4924      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4925   }
4926 )
4927
4928 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4929 (define_insn "*cond_<sve_int_op><mode>_z"
4930   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4931         (unspec:SVE_FULL_BHSI
4932           [(match_operand:<VPRED> 1 "register_operand")
4933            (unspec:SVE_FULL_BHSI
4934              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4935               (match_operand:VNx2DI 3 "register_operand")]
4936              SVE_SHIFT_WIDE)
4937            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4938          UNSPEC_SEL))]
4939   "TARGET_SVE"
4940   {@ [ cons: =0 , 1   , 2 , 3  ]
4941      [ &w       , Upl , 0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4942      [ &w       , Upl , w , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4943   }
4944   [(set_attr "movprfx" "yes")])
4945
4946 ;; -------------------------------------------------------------------------
4947 ;; ---- [INT] Shifts (rounding towards 0)
4948 ;; -------------------------------------------------------------------------
4949 ;; Includes:
4950 ;; - ASRD
4951 ;; - SQSHLU (SVE2)
4952 ;; - SRSHR (SVE2)
4953 ;; - URSHR (SVE2)
4954 ;; -------------------------------------------------------------------------
4955
4956 ;; Unpredicated ASRD.
4957 (define_expand "sdiv_pow2<mode>3"
4958   [(set (match_operand:SVE_I 0 "register_operand")
4959         (unspec:SVE_I
4960           [(match_dup 3)
4961            (unspec:SVE_I
4962              [(match_operand:SVE_I 1 "register_operand")
4963               (match_operand 2 "aarch64_simd_rshift_imm")]
4964              UNSPEC_ASRD)]
4965          UNSPEC_PRED_X))]
4966   "TARGET_SVE"
4967   {
4968     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4969   }
4970 )
4971
4972 ;; Predicated ASRD.
4973 (define_insn "*sdiv_pow2<mode>3"
4974   [(set (match_operand:SVE_I 0 "register_operand")
4975         (unspec:SVE_I
4976           [(match_operand:<VPRED> 1 "register_operand")
4977            (unspec:SVE_I
4978              [(match_operand:SVE_I 2 "register_operand")
4979               (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
4980              UNSPEC_ASRD)]
4981           UNSPEC_PRED_X))]
4982   "TARGET_SVE"
4983   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
4984      [ w        , Upl , 0 ; *              ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4985      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4986   }
4987 )
4988
4989 ;; Predicated shift with merging.
4990 (define_expand "@cond_<sve_int_op><mode>"
4991   [(set (match_operand:SVE_I 0 "register_operand")
4992         (unspec:SVE_I
4993           [(match_operand:<VPRED> 1 "register_operand")
4994            (unspec:SVE_I
4995              [(match_dup 5)
4996               (unspec:SVE_I
4997                 [(match_operand:SVE_I 2 "register_operand")
4998                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4999                 SVE_INT_SHIFT_IMM)]
5000              UNSPEC_PRED_X)
5001            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5002           UNSPEC_SEL))]
5003   "TARGET_SVE"
5004   {
5005     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
5006   }
5007 )
5008
5009 ;; Predicated shift, merging with the first input.
5010 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
5011   [(set (match_operand:SVE_I 0 "register_operand")
5012         (unspec:SVE_I
5013           [(match_operand:<VPRED> 1 "register_operand")
5014            (unspec:SVE_I
5015              [(match_operand 4)
5016               (unspec:SVE_I
5017                 [(match_operand:SVE_I 2 "register_operand")
5018                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5019                 SVE_INT_SHIFT_IMM)]
5020              UNSPEC_PRED_X)
5021            (match_dup 2)]
5022           UNSPEC_SEL))]
5023   "TARGET_SVE"
5024   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5025      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5026      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5027   }
5028   "&& !CONSTANT_P (operands[4])"
5029   {
5030     operands[4] = CONSTM1_RTX (<VPRED>mode);
5031   }
5032 )
5033
5034 ;; Predicated shift, merging with an independent value.
5035 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
5036   [(set (match_operand:SVE_I 0 "register_operand")
5037         (unspec:SVE_I
5038           [(match_operand:<VPRED> 1 "register_operand")
5039            (unspec:SVE_I
5040              [(match_operand 5)
5041               (unspec:SVE_I
5042                 [(match_operand:SVE_I 2 "register_operand")
5043                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5044                 SVE_INT_SHIFT_IMM)]
5045              UNSPEC_PRED_X)
5046            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5047          UNSPEC_SEL))]
5048   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5049   {@ [ cons: =0 , 1   , 2 , 4   ]
5050      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5051      [ &w       , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5052      [ ?&w      , Upl , w , w   ] #
5053   }
5054   "&& reload_completed
5055    && register_operand (operands[4], <MODE>mode)
5056    && !rtx_equal_p (operands[0], operands[4])"
5057   {
5058     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5059                                              operands[4], operands[1]));
5060     operands[4] = operands[2] = operands[0];
5061   }
5062   [(set_attr "movprfx" "yes")]
5063 )
5064
5065 ;; -------------------------------------------------------------------------
5066 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
5067 ;; -------------------------------------------------------------------------
5068 ;; Includes:
5069 ;; - FSCALE
5070 ;; - FTSMUL
5071 ;; - FTSSEL
5072 ;; -------------------------------------------------------------------------
5073
5074 ;; Unpredicated floating-point binary operations that take an integer as
5075 ;; their second operand.
5076 (define_insn "@aarch64_sve_<optab><mode>"
5077   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5078         (unspec:SVE_FULL_F
5079           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5080            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
5081           SVE_FP_BINARY_INT))]
5082   "TARGET_SVE"
5083   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5084 )
5085
5086 ;; Predicated floating-point binary operations that take an integer
5087 ;; as their second operand.
5088 (define_insn "@aarch64_pred_<optab><mode>"
5089   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5090         (unspec:SVE_FULL_F
5091           [(match_operand:<VPRED> 1 "register_operand")
5092            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5093            (match_operand:SVE_FULL_F 2 "register_operand")
5094            (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5095           SVE_COND_FP_BINARY_INT))]
5096   "TARGET_SVE"
5097   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5098      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5099      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5100   }
5101 )
5102
5103 ;; Predicated floating-point binary operations with merging, taking an
5104 ;; integer as their second operand.
5105 (define_expand "@cond_<optab><mode>"
5106   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5107         (unspec:SVE_FULL_F
5108           [(match_operand:<VPRED> 1 "register_operand")
5109            (unspec:SVE_FULL_F
5110              [(match_dup 1)
5111               (const_int SVE_STRICT_GP)
5112               (match_operand:SVE_FULL_F 2 "register_operand")
5113               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5114              SVE_COND_FP_BINARY_INT)
5115            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5116           UNSPEC_SEL))]
5117   "TARGET_SVE"
5118 )
5119
5120 ;; Predicated floating-point binary operations that take an integer as their
5121 ;; second operand, with inactive lanes coming from the first operand.
5122 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5123   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5124         (unspec:SVE_FULL_F
5125           [(match_operand:<VPRED> 1 "register_operand")
5126            (unspec:SVE_FULL_F
5127              [(match_operand 4)
5128               (const_int SVE_RELAXED_GP)
5129               (match_operand:SVE_FULL_F 2 "register_operand")
5130               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5131              SVE_COND_FP_BINARY_INT)
5132            (match_dup 2)]
5133           UNSPEC_SEL))]
5134   "TARGET_SVE"
5135   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5136      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5137      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5138   }
5139   "&& !rtx_equal_p (operands[1], operands[4])"
5140   {
5141     operands[4] = copy_rtx (operands[1]);
5142   }
5143 )
5144
5145 (define_insn "*cond_<optab><mode>_2_strict"
5146   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5147         (unspec:SVE_FULL_F
5148           [(match_operand:<VPRED> 1 "register_operand")
5149            (unspec:SVE_FULL_F
5150              [(match_dup 1)
5151               (const_int SVE_STRICT_GP)
5152               (match_operand:SVE_FULL_F 2 "register_operand")
5153               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5154              SVE_COND_FP_BINARY_INT)
5155            (match_dup 2)]
5156           UNSPEC_SEL))]
5157   "TARGET_SVE"
5158   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5159      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5160      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5161   }
5162 )
5163
5164 ;; Predicated floating-point binary operations that take an integer as
5165 ;; their second operand, with the values of inactive lanes being distinct
5166 ;; from the other inputs.
5167 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5168   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5169         (unspec:SVE_FULL_F
5170           [(match_operand:<VPRED> 1 "register_operand")
5171            (unspec:SVE_FULL_F
5172              [(match_operand 5)
5173               (const_int SVE_RELAXED_GP)
5174               (match_operand:SVE_FULL_F 2 "register_operand")
5175               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5176              SVE_COND_FP_BINARY_INT)
5177            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5178           UNSPEC_SEL))]
5179   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5180   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5181      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5182      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5183      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5184      [ ?&w      , Upl , w , w , w   ] #
5185   }
5186   "&& 1"
5187   {
5188     if (reload_completed
5189         && register_operand (operands[4], <MODE>mode)
5190         && !rtx_equal_p (operands[0], operands[4]))
5191       {
5192         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5193                                                  operands[4], operands[1]));
5194         operands[4] = operands[2] = operands[0];
5195       }
5196     else if (!rtx_equal_p (operands[1], operands[5]))
5197       operands[5] = copy_rtx (operands[1]);
5198     else
5199       FAIL;
5200   }
5201   [(set_attr "movprfx" "yes")]
5202 )
5203
5204 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5205   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5206         (unspec:SVE_FULL_F
5207           [(match_operand:<VPRED> 1 "register_operand")
5208            (unspec:SVE_FULL_F
5209              [(match_dup 1)
5210               (const_int SVE_STRICT_GP)
5211               (match_operand:SVE_FULL_F 2 "register_operand")
5212               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5213              SVE_COND_FP_BINARY_INT)
5214            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5215           UNSPEC_SEL))]
5216   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5217   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5218      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5219      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5220      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5221      [ ?&w      , Upl , w , w , w   ] #
5222   }
5223   "&& reload_completed
5224    && register_operand (operands[4], <MODE>mode)
5225    && !rtx_equal_p (operands[0], operands[4])"
5226   {
5227     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5228                                              operands[4], operands[1]));
5229     operands[4] = operands[2] = operands[0];
5230   }
5231   [(set_attr "movprfx" "yes")]
5232 )
5233
5234 ;; -------------------------------------------------------------------------
5235 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5236 ;; -------------------------------------------------------------------------
5237 ;; Includes post-RA forms of:
5238 ;; - FADD
5239 ;; - FMUL
5240 ;; - FSUB
5241 ;; -------------------------------------------------------------------------
5242
5243 ;; Unpredicated floating-point binary operations (post-RA only).
5244 ;; These are generated by splitting a predicated instruction whose
5245 ;; predicate is unused.
5246 (define_insn "*post_ra_<sve_fp_op><mode>3"
5247   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5248         (SVE_UNPRED_FP_BINARY:SVE_FULL_F
5249           (match_operand:SVE_FULL_F 1 "register_operand" "w")
5250           (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
5251   "TARGET_SVE && reload_completed"
5252   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5253
5254 ;; -------------------------------------------------------------------------
5255 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5256 ;; -------------------------------------------------------------------------
5257 ;; Includes merging forms of:
5258 ;; - FADD    (constant forms handled in the "Addition" section)
5259 ;; - FDIV
5260 ;; - FDIVR
5261 ;; - FMAX
5262 ;; - FMAXNM  (including #0.0 and #1.0)
5263 ;; - FMIN
5264 ;; - FMINNM  (including #0.0 and #1.0)
5265 ;; - FMUL    (including #0.5 and #2.0)
5266 ;; - FMULX
5267 ;; - FRECPS
5268 ;; - FRSQRTS
5269 ;; - FSUB    (constant forms handled in the "Addition" section)
5270 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
5271 ;; -------------------------------------------------------------------------
5272
5273 ;; Unpredicated floating-point binary operations.
5274 (define_insn "@aarch64_sve_<optab><mode>"
5275   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5276         (unspec:SVE_FULL_F
5277           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5278            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5279           SVE_FP_BINARY))]
5280   "TARGET_SVE"
5281   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5282 )
5283
5284 ;; Unpredicated floating-point binary operations that need to be predicated
5285 ;; for SVE.
5286 (define_expand "<optab><mode>3"
5287   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5288         (unspec:SVE_FULL_F
5289           [(match_dup 3)
5290            (const_int SVE_RELAXED_GP)
5291            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
5292            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
5293           SVE_COND_FP_BINARY_OPTAB))]
5294   "TARGET_SVE"
5295   {
5296     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5297   }
5298 )
5299
5300 ;; Predicated floating-point binary operations that have no immediate forms.
5301 (define_insn "@aarch64_pred_<optab><mode>"
5302   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5303         (unspec:SVE_FULL_F
5304           [(match_operand:<VPRED> 1 "register_operand")
5305            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5306            (match_operand:SVE_FULL_F 2 "register_operand")
5307            (match_operand:SVE_FULL_F 3 "register_operand")]
5308           SVE_COND_FP_BINARY_REG))]
5309   "TARGET_SVE"
5310   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5311      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5312      [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5313      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5314   }
5315 )
5316
5317 ;; Predicated floating-point operations with merging.
5318 (define_expand "@cond_<optab><mode>"
5319   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5320         (unspec:SVE_FULL_F
5321           [(match_operand:<VPRED> 1 "register_operand")
5322            (unspec:SVE_FULL_F
5323              [(match_dup 1)
5324               (const_int SVE_STRICT_GP)
5325               (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
5326               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
5327              SVE_COND_FP_BINARY)
5328            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5329           UNSPEC_SEL))]
5330   "TARGET_SVE"
5331 )
5332
5333 ;; Predicated floating-point operations, merging with the first input.
5334 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5335   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5336         (unspec:SVE_FULL_F
5337           [(match_operand:<VPRED> 1 "register_operand")
5338            (unspec:SVE_FULL_F
5339              [(match_operand 4)
5340               (const_int SVE_RELAXED_GP)
5341               (match_operand:SVE_FULL_F 2 "register_operand")
5342               (match_operand:SVE_FULL_F 3 "register_operand")]
5343              SVE_COND_FP_BINARY)
5344            (match_dup 2)]
5345           UNSPEC_SEL))]
5346   "TARGET_SVE"
5347   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5348      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5349      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5350   }
5351   "&& !rtx_equal_p (operands[1], operands[4])"
5352   {
5353     operands[4] = copy_rtx (operands[1]);
5354   }
5355 )
5356
5357 (define_insn "*cond_<optab><mode>_2_strict"
5358   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5359         (unspec:SVE_FULL_F
5360           [(match_operand:<VPRED> 1 "register_operand")
5361            (unspec:SVE_FULL_F
5362              [(match_dup 1)
5363               (const_int SVE_STRICT_GP)
5364               (match_operand:SVE_FULL_F 2 "register_operand")
5365               (match_operand:SVE_FULL_F 3 "register_operand")]
5366              SVE_COND_FP_BINARY)
5367            (match_dup 2)]
5368           UNSPEC_SEL))]
5369   "TARGET_SVE"
5370   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5371      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5372      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5373   }
5374 )
5375
5376 ;; Same for operations that take a 1-bit constant.
5377 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5378   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5379         (unspec:SVE_FULL_F
5380           [(match_operand:<VPRED> 1 "register_operand")
5381            (unspec:SVE_FULL_F
5382              [(match_operand 4)
5383               (const_int SVE_RELAXED_GP)
5384               (match_operand:SVE_FULL_F 2 "register_operand")
5385               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5386              SVE_COND_FP_BINARY_I1)
5387            (match_dup 2)]
5388           UNSPEC_SEL))]
5389   "TARGET_SVE"
5390   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5391      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5392      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5393   }
5394   "&& !rtx_equal_p (operands[1], operands[4])"
5395   {
5396     operands[4] = copy_rtx (operands[1]);
5397   }
5398 )
5399
5400 (define_insn "*cond_<optab><mode>_2_const_strict"
5401   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5402         (unspec:SVE_FULL_F
5403           [(match_operand:<VPRED> 1 "register_operand")
5404            (unspec:SVE_FULL_F
5405              [(match_dup 1)
5406               (const_int SVE_STRICT_GP)
5407               (match_operand:SVE_FULL_F 2 "register_operand")
5408               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5409              SVE_COND_FP_BINARY_I1)
5410            (match_dup 2)]
5411           UNSPEC_SEL))]
5412   "TARGET_SVE"
5413   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5414      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5415      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5416   }
5417 )
5418
5419 ;; Predicated floating-point operations, merging with the second input.
5420 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5421   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5422         (unspec:SVE_FULL_F
5423           [(match_operand:<VPRED> 1 "register_operand")
5424            (unspec:SVE_FULL_F
5425              [(match_operand 4)
5426               (const_int SVE_RELAXED_GP)
5427               (match_operand:SVE_FULL_F 2 "register_operand")
5428               (match_operand:SVE_FULL_F 3 "register_operand")]
5429              SVE_COND_FP_BINARY)
5430            (match_dup 3)]
5431           UNSPEC_SEL))]
5432   "TARGET_SVE"
5433   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5434      [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5435      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5436   }
5437   "&& !rtx_equal_p (operands[1], operands[4])"
5438   {
5439     operands[4] = copy_rtx (operands[1]);
5440   }
5441 )
5442
5443 (define_insn "*cond_<optab><mode>_3_strict"
5444   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5445         (unspec:SVE_FULL_F
5446           [(match_operand:<VPRED> 1 "register_operand")
5447            (unspec:SVE_FULL_F
5448              [(match_dup 1)
5449               (const_int SVE_STRICT_GP)
5450               (match_operand:SVE_FULL_F 2 "register_operand")
5451               (match_operand:SVE_FULL_F 3 "register_operand")]
5452              SVE_COND_FP_BINARY)
5453            (match_dup 3)]
5454           UNSPEC_SEL))]
5455   "TARGET_SVE"
5456   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5457      [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5458      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5459   }
5460 )
5461
5462 ;; Predicated floating-point operations, merging with an independent value.
5463 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5464   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5465         (unspec:SVE_FULL_F
5466           [(match_operand:<VPRED> 1 "register_operand")
5467            (unspec:SVE_FULL_F
5468              [(match_operand 5)
5469               (const_int SVE_RELAXED_GP)
5470               (match_operand:SVE_FULL_F 2 "register_operand")
5471               (match_operand:SVE_FULL_F 3 "register_operand")]
5472              SVE_COND_FP_BINARY)
5473            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5474           UNSPEC_SEL))]
5475   "TARGET_SVE
5476    && !rtx_equal_p (operands[2], operands[4])
5477    && !rtx_equal_p (operands[3], operands[4])"
5478   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5479      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5480      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5481      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5482      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5483      [ ?&w      , Upl , w , w , w   ] #
5484   }
5485   "&& 1"
5486   {
5487     if (reload_completed
5488         && register_operand (operands[4], <MODE>mode)
5489         && !rtx_equal_p (operands[0], operands[4]))
5490       {
5491         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5492                                                  operands[4], operands[1]));
5493         operands[4] = operands[2] = operands[0];
5494       }
5495     else if (!rtx_equal_p (operands[1], operands[5]))
5496       operands[5] = copy_rtx (operands[1]);
5497     else
5498       FAIL;
5499   }
5500   [(set_attr "movprfx" "yes")]
5501 )
5502
5503 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5504   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5505         (unspec:SVE_FULL_F
5506           [(match_operand:<VPRED> 1 "register_operand")
5507            (unspec:SVE_FULL_F
5508              [(match_dup 1)
5509               (const_int SVE_STRICT_GP)
5510               (match_operand:SVE_FULL_F 2 "register_operand")
5511               (match_operand:SVE_FULL_F 3 "register_operand")]
5512              SVE_COND_FP_BINARY)
5513            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5514           UNSPEC_SEL))]
5515   "TARGET_SVE
5516    && !rtx_equal_p (operands[2], operands[4])
5517    && !rtx_equal_p (operands[3], operands[4])"
5518   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5519      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5520      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5521      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5522      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5523      [ ?&w      , Upl , w , w , w   ] #
5524   }
5525   "&& reload_completed
5526    && register_operand (operands[4], <MODE>mode)
5527    && !rtx_equal_p (operands[0], operands[4])"
5528   {
5529     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5530                                              operands[4], operands[1]));
5531     operands[4] = operands[2] = operands[0];
5532   }
5533   [(set_attr "movprfx" "yes")]
5534 )
5535
5536 ;; Same for operations that take a 1-bit constant.
5537 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5538   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5539         (unspec:SVE_FULL_F
5540           [(match_operand:<VPRED> 1 "register_operand")
5541            (unspec:SVE_FULL_F
5542              [(match_operand 5)
5543               (const_int SVE_RELAXED_GP)
5544               (match_operand:SVE_FULL_F 2 "register_operand")
5545               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5546              SVE_COND_FP_BINARY_I1)
5547            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5548           UNSPEC_SEL))]
5549   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5550   {@ [ cons: =0 , 1   , 2 , 4   ]
5551      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5552      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5553      [ ?w       , Upl , w , w   ] #
5554   }
5555   "&& 1"
5556   {
5557     if (reload_completed
5558         && register_operand (operands[4], <MODE>mode)
5559         && !rtx_equal_p (operands[0], operands[4]))
5560       {
5561         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5562                                                  operands[4], operands[1]));
5563         operands[4] = operands[2] = operands[0];
5564       }
5565     else if (!rtx_equal_p (operands[1], operands[5]))
5566       operands[5] = copy_rtx (operands[1]);
5567     else
5568       FAIL;
5569   }
5570   [(set_attr "movprfx" "yes")]
5571 )
5572
5573 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5574   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5575         (unspec:SVE_FULL_F
5576           [(match_operand:<VPRED> 1 "register_operand")
5577            (unspec:SVE_FULL_F
5578              [(match_dup 1)
5579               (const_int SVE_STRICT_GP)
5580               (match_operand:SVE_FULL_F 2 "register_operand")
5581               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5582              SVE_COND_FP_BINARY_I1)
5583            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5584           UNSPEC_SEL))]
5585   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5586   {@ [ cons: =0 , 1   , 2 , 4   ]
5587      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5588      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5589      [ ?w       , Upl , w , w   ] #
5590   }
5591   "&& reload_completed
5592    && register_operand (operands[4], <MODE>mode)
5593    && !rtx_equal_p (operands[0], operands[4])"
5594   {
5595     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5596                                              operands[4], operands[1]));
5597     operands[4] = operands[2] = operands[0];
5598   }
5599   [(set_attr "movprfx" "yes")]
5600 )
5601
5602 ;; -------------------------------------------------------------------------
5603 ;; ---- [FP] Addition
5604 ;; -------------------------------------------------------------------------
5605 ;; Includes:
5606 ;; - FADD
5607 ;; - FSUB
5608 ;; -------------------------------------------------------------------------
5609
5610 ;; Predicated floating-point addition.
5611 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5612   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5613         (unspec:SVE_FULL_F
5614           [(match_operand:<VPRED> 1 "register_operand")
5615            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5616            (match_operand:SVE_FULL_F 2 "register_operand")
5617            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
5618           SVE_COND_FP_ADD))]
5619   "TARGET_SVE"
5620   {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
5621      [ w        , Upl , %0 , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5622      [ w        , Upl , 0  , vsN , i   ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5623      [ w        , Upl , w  , w   , Z   ; *              ] #
5624      [ w        , Upl , 0  , w   , Ui1 ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5625      [ ?&w      , Upl , w  , vsA , i   ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5626      [ ?&w      , Upl , w  , vsN , i   ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5627      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5628   }
5629   ; Split the unpredicated form after reload, so that we don't have
5630   ; the unnecessary PTRUE.
5631   "&& reload_completed
5632    && register_operand (operands[3], <MODE>mode)
5633    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5634   [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5635   ""
5636 )
5637
5638 ;; Predicated floating-point addition of a constant, merging with the
5639 ;; first input.
5640 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5641   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5642         (unspec:SVE_FULL_F
5643           [(match_operand:<VPRED> 1 "register_operand")
5644            (unspec:SVE_FULL_F
5645              [(match_operand 4)
5646               (const_int SVE_RELAXED_GP)
5647               (match_operand:SVE_FULL_F 2 "register_operand")
5648               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5649              UNSPEC_COND_FADD)
5650            (match_dup 2)]
5651           UNSPEC_SEL))]
5652   "TARGET_SVE"
5653   {@ [ cons: =0 , 1   , 2 , 3   ; attrs: movprfx ]
5654      [ w        , Upl , 0 , vsA ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5655      [ w        , Upl , 0 , vsN ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5656      [ ?w       , Upl , w , vsA ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5657      [ ?w       , Upl , w , vsN ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5658   }
5659   "&& !rtx_equal_p (operands[1], operands[4])"
5660   {
5661     operands[4] = copy_rtx (operands[1]);
5662   }
5663 )
5664
5665 (define_insn "*cond_add<mode>_2_const_strict"
5666   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5667         (unspec:SVE_FULL_F
5668           [(match_operand:<VPRED> 1 "register_operand")
5669            (unspec:SVE_FULL_F
5670              [(match_dup 1)
5671               (const_int SVE_STRICT_GP)
5672               (match_operand:SVE_FULL_F 2 "register_operand")
5673               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5674              UNSPEC_COND_FADD)
5675            (match_dup 2)]
5676           UNSPEC_SEL))]
5677   "TARGET_SVE"
5678   {@ [ cons: =0 , 1   , 2 , 3   ; attrs: movprfx ]
5679      [ w        , Upl , 0 , vsA ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5680      [ w        , Upl , 0 , vsN ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5681      [ ?w       , Upl , w , vsA ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5682      [ ?w       , Upl , w , vsN ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5683   }
5684 )
5685
5686 ;; Predicated floating-point addition of a constant, merging with an
5687 ;; independent value.
5688 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5689   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5690         (unspec:SVE_FULL_F
5691           [(match_operand:<VPRED> 1 "register_operand")
5692            (unspec:SVE_FULL_F
5693              [(match_operand 5)
5694               (const_int SVE_RELAXED_GP)
5695               (match_operand:SVE_FULL_F 2 "register_operand")
5696               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5697              UNSPEC_COND_FADD)
5698            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5699           UNSPEC_SEL))]
5700   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5701   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
5702      [ w        , Upl , w , vsA , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5703      [ w        , Upl , w , vsN , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5704      [ w        , Upl , w , vsA , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5705      [ w        , Upl , w , vsN , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5706      [ ?w       , Upl , w , vsA , w   ] #
5707      [ ?w       , Upl , w , vsN , w   ] #
5708   }
5709   "&& 1"
5710   {
5711     if (reload_completed
5712         && register_operand (operands[4], <MODE>mode)
5713         && !rtx_equal_p (operands[0], operands[4]))
5714       {
5715         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5716                                                  operands[4], operands[1]));
5717         operands[4] = operands[2] = operands[0];
5718       }
5719     else if (!rtx_equal_p (operands[1], operands[5]))
5720       operands[5] = copy_rtx (operands[1]);
5721     else
5722       FAIL;
5723   }
5724   [(set_attr "movprfx" "yes")]
5725 )
5726
5727 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5728   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5729         (unspec:SVE_FULL_F
5730           [(match_operand:<VPRED> 1 "register_operand")
5731            (unspec:SVE_FULL_F
5732              [(match_dup 1)
5733               (const_int SVE_STRICT_GP)
5734               (match_operand:SVE_FULL_F 2 "register_operand")
5735               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5736              UNSPEC_COND_FADD)
5737            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5738           UNSPEC_SEL))]
5739   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5740   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
5741      [ w        , Upl , w , vsA , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5742      [ w        , Upl , w , vsN , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5743      [ w        , Upl , w , vsA , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5744      [ w        , Upl , w , vsN , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5745      [ ?w       , Upl , w , vsA , w   ] #
5746      [ ?w       , Upl , w , vsN , w   ] #
5747   }
5748   "&& reload_completed
5749    && register_operand (operands[4], <MODE>mode)
5750    && !rtx_equal_p (operands[0], operands[4])"
5751   {
5752     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5753                                              operands[4], operands[1]));
5754     operands[4] = operands[2] = operands[0];
5755   }
5756   [(set_attr "movprfx" "yes")]
5757 )
5758
5759 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5760
5761 ;; -------------------------------------------------------------------------
5762 ;; ---- [FP] Complex addition
5763 ;; -------------------------------------------------------------------------
5764 ;; Includes:
5765 ;; - FCADD
5766 ;; -------------------------------------------------------------------------
5767
5768 ;; Predicated FCADD.
5769 (define_insn "@aarch64_pred_<optab><mode>"
5770   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5771         (unspec:SVE_FULL_F
5772           [(match_operand:<VPRED> 1 "register_operand")
5773            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5774            (match_operand:SVE_FULL_F 2 "register_operand")
5775            (match_operand:SVE_FULL_F 3 "register_operand")]
5776           SVE_COND_FCADD))]
5777   "TARGET_SVE"
5778   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5779      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5780      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5781   }
5782 )
5783
5784 ;; Predicated FCADD with merging.
5785 (define_expand "@cond_<optab><mode>"
5786   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5787         (unspec:SVE_FULL_F
5788           [(match_operand:<VPRED> 1 "register_operand")
5789            (unspec:SVE_FULL_F
5790              [(match_dup 1)
5791               (const_int SVE_STRICT_GP)
5792               (match_operand:SVE_FULL_F 2 "register_operand")
5793               (match_operand:SVE_FULL_F 3 "register_operand")]
5794              SVE_COND_FCADD)
5795            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5796           UNSPEC_SEL))]
5797   "TARGET_SVE"
5798 )
5799
5800 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5801 (define_expand "@cadd<rot><mode>3"
5802   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5803         (unspec:SVE_FULL_F
5804           [(match_dup 3)
5805            (const_int SVE_RELAXED_GP)
5806            (match_operand:SVE_FULL_F 1 "register_operand")
5807            (match_operand:SVE_FULL_F 2 "register_operand")]
5808           SVE_COND_FCADD))]
5809   "TARGET_SVE"
5810 {
5811   operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5812 })
5813
5814 ;; Predicated FCADD, merging with the first input.
5815 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5816   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5817         (unspec:SVE_FULL_F
5818           [(match_operand:<VPRED> 1 "register_operand")
5819            (unspec:SVE_FULL_F
5820              [(match_operand 4)
5821               (const_int SVE_RELAXED_GP)
5822               (match_operand:SVE_FULL_F 2 "register_operand")
5823               (match_operand:SVE_FULL_F 3 "register_operand")]
5824              SVE_COND_FCADD)
5825            (match_dup 2)]
5826           UNSPEC_SEL))]
5827   "TARGET_SVE"
5828   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5829      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5830      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5831   }
5832   "&& !rtx_equal_p (operands[1], operands[4])"
5833   {
5834     operands[4] = copy_rtx (operands[1]);
5835   }
5836 )
5837
5838 (define_insn "*cond_<optab><mode>_2_strict"
5839   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5840         (unspec:SVE_FULL_F
5841           [(match_operand:<VPRED> 1 "register_operand")
5842            (unspec:SVE_FULL_F
5843              [(match_dup 1)
5844               (const_int SVE_STRICT_GP)
5845               (match_operand:SVE_FULL_F 2 "register_operand")
5846               (match_operand:SVE_FULL_F 3 "register_operand")]
5847              SVE_COND_FCADD)
5848            (match_dup 2)]
5849           UNSPEC_SEL))]
5850   "TARGET_SVE"
5851   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5852      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5853      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5854   }
5855 )
5856
5857 ;; Predicated FCADD, merging with an independent value.
5858 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5859   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5860         (unspec:SVE_FULL_F
5861           [(match_operand:<VPRED> 1 "register_operand")
5862            (unspec:SVE_FULL_F
5863              [(match_operand 5)
5864               (const_int SVE_RELAXED_GP)
5865               (match_operand:SVE_FULL_F 2 "register_operand")
5866               (match_operand:SVE_FULL_F 3 "register_operand")]
5867              SVE_COND_FCADD)
5868            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5869           UNSPEC_SEL))]
5870   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5871   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5872      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5873      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5874      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5875      [ ?&w      , Upl , w , w , w   ] #
5876   }
5877   "&& 1"
5878   {
5879     if (reload_completed
5880         && register_operand (operands[4], <MODE>mode)
5881         && !rtx_equal_p (operands[0], operands[4]))
5882       {
5883         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5884                                                  operands[4], operands[1]));
5885         operands[4] = operands[2] = operands[0];
5886       }
5887     else if (!rtx_equal_p (operands[1], operands[5]))
5888       operands[5] = copy_rtx (operands[1]);
5889     else
5890       FAIL;
5891   }
5892   [(set_attr "movprfx" "yes")]
5893 )
5894
5895 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5896   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5897         (unspec:SVE_FULL_F
5898           [(match_operand:<VPRED> 1 "register_operand")
5899            (unspec:SVE_FULL_F
5900              [(match_dup 1)
5901               (const_int SVE_STRICT_GP)
5902               (match_operand:SVE_FULL_F 2 "register_operand")
5903               (match_operand:SVE_FULL_F 3 "register_operand")]
5904              SVE_COND_FCADD)
5905            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5906           UNSPEC_SEL))]
5907   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5908   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5909      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5910      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5911      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5912      [ ?&w      , Upl , w , w , w   ] #
5913   }
5914   "&& reload_completed
5915    && register_operand (operands[4], <MODE>mode)
5916    && !rtx_equal_p (operands[0], operands[4])"
5917   {
5918     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5919                                              operands[4], operands[1]));
5920     operands[4] = operands[2] = operands[0];
5921   }
5922   [(set_attr "movprfx" "yes")]
5923 )
5924
5925 ;; -------------------------------------------------------------------------
5926 ;; ---- [FP] Subtraction
5927 ;; -------------------------------------------------------------------------
5928 ;; Includes:
5929 ;; - FSUB
5930 ;; - FSUBR
5931 ;; -------------------------------------------------------------------------
5932
5933 ;; Predicated floating-point subtraction.
5934 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5935   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5936         (unspec:SVE_FULL_F
5937           [(match_operand:<VPRED> 1 "register_operand")
5938            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5939            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
5940            (match_operand:SVE_FULL_F 3 "register_operand")]
5941           SVE_COND_FP_SUB))]
5942   "TARGET_SVE"
5943   {@ [ cons: =0 , 1   , 2   , 3 , 4   ; attrs: movprfx ]
5944      [ w        , Upl , vsA , 0 , i   ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5945      [ w        , Upl , w   , w , Z   ; *              ] #
5946      [ w        , Upl , 0   , w , Ui1 ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5947      [ w        , Upl , w   , 0 , Ui1 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5948      [ ?&w      , Upl , vsA , w , i   ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5949      [ ?&w      , Upl , w   , w , Ui1 ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5950   }
5951   ; Split the unpredicated form after reload, so that we don't have
5952   ; the unnecessary PTRUE.
5953   "&& reload_completed
5954    && register_operand (operands[2], <MODE>mode)
5955    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5956   [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5957   ""
5958 )
5959
5960 ;; Predicated floating-point subtraction from a constant, merging with the
5961 ;; second input.
5962 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
5963   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5964         (unspec:SVE_FULL_F
5965           [(match_operand:<VPRED> 1 "register_operand")
5966            (unspec:SVE_FULL_F
5967              [(match_operand 4)
5968               (const_int SVE_RELAXED_GP)
5969               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5970               (match_operand:SVE_FULL_F 3 "register_operand")]
5971              UNSPEC_COND_FSUB)
5972            (match_dup 3)]
5973           UNSPEC_SEL))]
5974   "TARGET_SVE"
5975   {@ [ cons: =0 , 1   , 3 ; attrs: movprfx ]
5976      [ w        , Upl , 0 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5977      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5978   }
5979   "&& !rtx_equal_p (operands[1], operands[4])"
5980   {
5981     operands[4] = copy_rtx (operands[1]);
5982   }
5983 )
5984
5985 (define_insn "*cond_sub<mode>_3_const_strict"
5986   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5987         (unspec:SVE_FULL_F
5988           [(match_operand:<VPRED> 1 "register_operand")
5989            (unspec:SVE_FULL_F
5990              [(match_dup 1)
5991               (const_int SVE_STRICT_GP)
5992               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5993               (match_operand:SVE_FULL_F 3 "register_operand")]
5994              UNSPEC_COND_FSUB)
5995            (match_dup 3)]
5996           UNSPEC_SEL))]
5997   "TARGET_SVE"
5998   {@ [ cons: =0 , 1   , 3 ; attrs: movprfx ]
5999      [ w        , Upl , 0 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6000      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6001   }
6002 )
6003
6004 ;; Predicated floating-point subtraction from a constant, merging with an
6005 ;; independent value.
6006 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
6007   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6008         (unspec:SVE_FULL_F
6009           [(match_operand:<VPRED> 1 "register_operand")
6010            (unspec:SVE_FULL_F
6011              [(match_operand 5)
6012               (const_int SVE_RELAXED_GP)
6013               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6014               (match_operand:SVE_FULL_F 3 "register_operand")]
6015              UNSPEC_COND_FSUB)
6016            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6017           UNSPEC_SEL))]
6018   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6019   {@ [ cons: =0 , 1   , 3 , 4   ]
6020      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6021      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6022      [ ?w       , Upl , w , w   ] #
6023   }
6024   "&& 1"
6025   {
6026     if (reload_completed
6027         && register_operand (operands[4], <MODE>mode)
6028         && !rtx_equal_p (operands[0], operands[4]))
6029       {
6030         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6031                                                  operands[4], operands[1]));
6032         operands[4] = operands[3] = operands[0];
6033       }
6034     else if (!rtx_equal_p (operands[1], operands[5]))
6035       operands[5] = copy_rtx (operands[1]);
6036     else
6037       FAIL;
6038   }
6039   [(set_attr "movprfx" "yes")]
6040 )
6041
6042 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
6043   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6044         (unspec:SVE_FULL_F
6045           [(match_operand:<VPRED> 1 "register_operand")
6046            (unspec:SVE_FULL_F
6047              [(match_dup 1)
6048               (const_int SVE_STRICT_GP)
6049               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6050               (match_operand:SVE_FULL_F 3 "register_operand")]
6051              UNSPEC_COND_FSUB)
6052            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6053           UNSPEC_SEL))]
6054   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6055   {@ [ cons: =0 , 1   , 3 , 4   ]
6056      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6057      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6058      [ ?w       , Upl , w , w   ] #
6059   }
6060   "&& reload_completed
6061    && register_operand (operands[4], <MODE>mode)
6062    && !rtx_equal_p (operands[0], operands[4])"
6063   {
6064     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6065                                              operands[4], operands[1]));
6066     operands[4] = operands[3] = operands[0];
6067   }
6068   [(set_attr "movprfx" "yes")]
6069 )
6070 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
6071
6072 ;; -------------------------------------------------------------------------
6073 ;; ---- [FP] Absolute difference
6074 ;; -------------------------------------------------------------------------
6075 ;; Includes:
6076 ;; - FABD
6077 ;; -------------------------------------------------------------------------
6078
6079 ;; Predicated floating-point absolute difference.
6080 (define_expand "@aarch64_pred_abd<mode>"
6081   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6082         (unspec:SVE_FULL_F
6083           [(match_operand:<VPRED> 1 "register_operand")
6084            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6085            (unspec:SVE_FULL_F
6086              [(match_dup 1)
6087               (match_dup 4)
6088               (match_operand:SVE_FULL_F 2 "register_operand")
6089               (match_operand:SVE_FULL_F 3 "register_operand")]
6090              UNSPEC_COND_FSUB)]
6091           UNSPEC_COND_FABS))]
6092   "TARGET_SVE"
6093 )
6094
6095 ;; Predicated floating-point absolute difference.
6096 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
6097   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6098         (unspec:SVE_FULL_F
6099           [(match_operand:<VPRED> 1 "register_operand")
6100            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6101            (unspec:SVE_FULL_F
6102              [(match_operand 5)
6103               (const_int SVE_RELAXED_GP)
6104               (match_operand:SVE_FULL_F 2 "register_operand")
6105               (match_operand:SVE_FULL_F 3 "register_operand")]
6106              UNSPEC_COND_FSUB)]
6107           UNSPEC_COND_FABS))]
6108   "TARGET_SVE"
6109   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
6110      [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6111      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6112   }
6113   "&& !rtx_equal_p (operands[1], operands[5])"
6114   {
6115     operands[5] = copy_rtx (operands[1]);
6116   }
6117 )
6118
6119 (define_insn "*aarch64_pred_abd<mode>_strict"
6120   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6121         (unspec:SVE_FULL_F
6122           [(match_operand:<VPRED> 1 "register_operand")
6123            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6124            (unspec:SVE_FULL_F
6125              [(match_dup 1)
6126               (const_int SVE_STRICT_GP)
6127               (match_operand:SVE_FULL_F 2 "register_operand")
6128               (match_operand:SVE_FULL_F 3 "register_operand")]
6129              UNSPEC_COND_FSUB)]
6130           UNSPEC_COND_FABS))]
6131   "TARGET_SVE"
6132   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
6133      [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6134      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6135   }
6136 )
6137
6138 (define_expand "@aarch64_cond_abd<mode>"
6139   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6140         (unspec:SVE_FULL_F
6141           [(match_operand:<VPRED> 1 "register_operand")
6142            (unspec:SVE_FULL_F
6143              [(match_dup 1)
6144               (const_int SVE_STRICT_GP)
6145               (unspec:SVE_FULL_F
6146                 [(match_dup 1)
6147                  (const_int SVE_STRICT_GP)
6148                  (match_operand:SVE_FULL_F 2 "register_operand")
6149                  (match_operand:SVE_FULL_F 3 "register_operand")]
6150                 UNSPEC_COND_FSUB)]
6151              UNSPEC_COND_FABS)
6152            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6153           UNSPEC_SEL))]
6154   "TARGET_SVE"
6155 {
6156   if (rtx_equal_p (operands[3], operands[4]))
6157     std::swap (operands[2], operands[3]);
6158 })
6159
6160 ;; Predicated floating-point absolute difference, merging with the first
6161 ;; input.
6162 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
6163   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6164         (unspec:SVE_FULL_F
6165           [(match_operand:<VPRED> 1 "register_operand")
6166            (unspec:SVE_FULL_F
6167              [(match_operand 4)
6168               (const_int SVE_RELAXED_GP)
6169               (unspec:SVE_FULL_F
6170                 [(match_operand 5)
6171                  (const_int SVE_RELAXED_GP)
6172                  (match_operand:SVE_FULL_F 2 "register_operand")
6173                  (match_operand:SVE_FULL_F 3 "register_operand")]
6174                 UNSPEC_COND_FSUB)]
6175              UNSPEC_COND_FABS)
6176            (match_dup 2)]
6177           UNSPEC_SEL))]
6178   "TARGET_SVE"
6179   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6180      [ w        , Upl , 0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6181      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6182   }
6183   "&& (!rtx_equal_p (operands[1], operands[4])
6184        || !rtx_equal_p (operands[1], operands[5]))"
6185   {
6186     operands[4] = copy_rtx (operands[1]);
6187     operands[5] = copy_rtx (operands[1]);
6188   }
6189 )
6190
6191 (define_insn "*aarch64_cond_abd<mode>_2_strict"
6192   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6193         (unspec:SVE_FULL_F
6194           [(match_operand:<VPRED> 1 "register_operand")
6195            (unspec:SVE_FULL_F
6196              [(match_dup 1)
6197               (match_operand:SI 4 "aarch64_sve_gp_strictness")
6198               (unspec:SVE_FULL_F
6199                 [(match_dup 1)
6200                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
6201                  (match_operand:SVE_FULL_F 2 "register_operand")
6202                  (match_operand:SVE_FULL_F 3 "register_operand")]
6203                 UNSPEC_COND_FSUB)]
6204              UNSPEC_COND_FABS)
6205            (match_dup 2)]
6206           UNSPEC_SEL))]
6207   "TARGET_SVE"
6208   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6209      [ w        , Upl , 0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6210      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6211   }
6212 )
6213
6214 ;; Predicated floating-point absolute difference, merging with the second
6215 ;; input.
6216 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
6217   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6218         (unspec:SVE_FULL_F
6219           [(match_operand:<VPRED> 1 "register_operand")
6220            (unspec:SVE_FULL_F
6221              [(match_operand 4)
6222               (const_int SVE_RELAXED_GP)
6223               (unspec:SVE_FULL_F
6224                 [(match_operand 5)
6225                  (const_int SVE_RELAXED_GP)
6226                  (match_operand:SVE_FULL_F 2 "register_operand")
6227                  (match_operand:SVE_FULL_F 3 "register_operand")]
6228                 UNSPEC_COND_FSUB)]
6229              UNSPEC_COND_FABS)
6230            (match_dup 3)]
6231           UNSPEC_SEL))]
6232   "TARGET_SVE"
6233   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6234      [ w        , Upl , w , 0 ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6235      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6236   }
6237   "&& (!rtx_equal_p (operands[1], operands[4])
6238        || !rtx_equal_p (operands[1], operands[5]))"
6239   {
6240     operands[4] = copy_rtx (operands[1]);
6241     operands[5] = copy_rtx (operands[1]);
6242   }
6243 )
6244
6245 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6246   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6247         (unspec:SVE_FULL_F
6248           [(match_operand:<VPRED> 1 "register_operand")
6249            (unspec:SVE_FULL_F
6250              [(match_dup 1)
6251               (match_operand:SI 4 "aarch64_sve_gp_strictness")
6252               (unspec:SVE_FULL_F
6253                 [(match_dup 1)
6254                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
6255                  (match_operand:SVE_FULL_F 2 "register_operand")
6256                  (match_operand:SVE_FULL_F 3 "register_operand")]
6257                 UNSPEC_COND_FSUB)]
6258              UNSPEC_COND_FABS)
6259            (match_dup 3)]
6260           UNSPEC_SEL))]
6261   "TARGET_SVE"
6262   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6263      [ w        , Upl , w , 0 ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6264      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6265   }
6266 )
6267
6268 ;; Predicated floating-point absolute difference, merging with an
6269 ;; independent value.
6270 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6271   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6272         (unspec:SVE_FULL_F
6273           [(match_operand:<VPRED> 1 "register_operand")
6274            (unspec:SVE_FULL_F
6275              [(match_operand 5)
6276               (const_int SVE_RELAXED_GP)
6277               (unspec:SVE_FULL_F
6278                 [(match_operand 6)
6279                  (const_int SVE_RELAXED_GP)
6280                  (match_operand:SVE_FULL_F 2 "register_operand")
6281                  (match_operand:SVE_FULL_F 3 "register_operand")]
6282                 UNSPEC_COND_FSUB)]
6283              UNSPEC_COND_FABS)
6284            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6285           UNSPEC_SEL))]
6286   "TARGET_SVE
6287    && !rtx_equal_p (operands[2], operands[4])
6288    && !rtx_equal_p (operands[3], operands[4])"
6289   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6290      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6291      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6292      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6293      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6294      [ ?&w      , Upl , w , w , w   ] #
6295   }
6296   "&& 1"
6297   {
6298     if (reload_completed
6299         && register_operand (operands[4], <MODE>mode)
6300         && !rtx_equal_p (operands[0], operands[4]))
6301       {
6302         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6303                                                  operands[4], operands[1]));
6304         operands[4] = operands[3] = operands[0];
6305       }
6306     else if (!rtx_equal_p (operands[1], operands[5])
6307              || !rtx_equal_p (operands[1], operands[6]))
6308       {
6309         operands[5] = copy_rtx (operands[1]);
6310         operands[6] = copy_rtx (operands[1]);
6311       }
6312     else
6313       FAIL;
6314   }
6315   [(set_attr "movprfx" "yes")]
6316 )
6317
6318 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6319   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6320         (unspec:SVE_FULL_F
6321           [(match_operand:<VPRED> 1 "register_operand")
6322            (unspec:SVE_FULL_F
6323              [(match_dup 1)
6324               (match_operand:SI 5 "aarch64_sve_gp_strictness")
6325               (unspec:SVE_FULL_F
6326                 [(match_dup 1)
6327                  (match_operand:SI 6 "aarch64_sve_gp_strictness")
6328                  (match_operand:SVE_FULL_F 2 "register_operand")
6329                  (match_operand:SVE_FULL_F 3 "register_operand")]
6330                 UNSPEC_COND_FSUB)]
6331              UNSPEC_COND_FABS)
6332            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6333           UNSPEC_SEL))]
6334   "TARGET_SVE
6335    && !rtx_equal_p (operands[2], operands[4])
6336    && !rtx_equal_p (operands[3], operands[4])"
6337   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6338      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6339      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6340      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6341      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6342      [ ?&w      , Upl , w , w , w   ] #
6343   }
6344   "&& reload_completed
6345    && register_operand (operands[4], <MODE>mode)
6346    && !rtx_equal_p (operands[0], operands[4])"
6347   {
6348     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6349                                              operands[4], operands[1]));
6350     operands[4] = operands[3] = operands[0];
6351   }
6352   [(set_attr "movprfx" "yes")]
6353 )
6354
6355 ;; -------------------------------------------------------------------------
6356 ;; ---- [FP] Multiplication
6357 ;; -------------------------------------------------------------------------
6358 ;; Includes:
6359 ;; - FMUL
6360 ;; -------------------------------------------------------------------------
6361
6362 ;; Predicated floating-point multiplication.
6363 (define_insn_and_split "@aarch64_pred_<optab><mode>"
6364   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6365         (unspec:SVE_FULL_F
6366           [(match_operand:<VPRED> 1 "register_operand")
6367            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6368            (match_operand:SVE_FULL_F 2 "register_operand")
6369            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
6370           SVE_COND_FP_MUL))]
6371   "TARGET_SVE"
6372   {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
6373      [ w        , Upl , %0 , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6374      [ w        , Upl , w  , w   , Z   ; *              ] #
6375      [ w        , Upl , 0  , w   , Ui1 ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6376      [ ?&w      , Upl , w  , vsM , i   ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6377      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6378   }
6379   ; Split the unpredicated form after reload, so that we don't have
6380   ; the unnecessary PTRUE.
6381   "&& reload_completed
6382    && register_operand (operands[3], <MODE>mode)
6383    && INTVAL (operands[4]) == SVE_RELAXED_GP"
6384   [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
6385   ""
6386 )
6387
6388 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6389 ;; SVE_COND_FP_BINARY_I1.
6390
6391 ;; Unpredicated multiplication by selected lanes.
6392 (define_insn "@aarch64_mul_lane_<mode>"
6393   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6394         (mult:SVE_FULL_F
6395           (unspec:SVE_FULL_F
6396             [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
6397              (match_operand:SI 3 "const_int_operand")]
6398             UNSPEC_SVE_LANE_SELECT)
6399           (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
6400   "TARGET_SVE"
6401   "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6402 )
6403
6404 ;; -------------------------------------------------------------------------
6405 ;; ---- [FP] Division
6406 ;; -------------------------------------------------------------------------
6407 ;; The patterns in this section are synthetic.
6408 ;; -------------------------------------------------------------------------
6409
6410 (define_expand "div<mode>3"
6411   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6412         (unspec:SVE_FULL_F
6413           [(match_dup 3)
6414            (const_int SVE_RELAXED_GP)
6415            (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6416            (match_operand:SVE_FULL_F 2 "register_operand")]
6417           UNSPEC_COND_FDIV))]
6418   "TARGET_SVE"
6419   {
6420     if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6421       DONE;
6422
6423     operands[1] = force_reg (<MODE>mode, operands[1]);
6424     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6425   }
6426 )
6427
6428 (define_expand "@aarch64_frecpe<mode>"
6429   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6430         (unspec:SVE_FULL_F
6431           [(match_operand:SVE_FULL_F 1 "register_operand")]
6432           UNSPEC_FRECPE))]
6433   "TARGET_SVE"
6434 )
6435
6436 (define_expand "@aarch64_frecps<mode>"
6437   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6438         (unspec:SVE_FULL_F
6439           [(match_operand:SVE_FULL_F 1 "register_operand")
6440            (match_operand:SVE_FULL_F 2 "register_operand")]
6441           UNSPEC_FRECPS))]
6442   "TARGET_SVE"
6443 )
6444
6445 ;; -------------------------------------------------------------------------
6446 ;; ---- [FP] Binary logical operations
6447 ;; -------------------------------------------------------------------------
6448 ;; Includes
6449 ;; - AND
6450 ;; - EOR
6451 ;; - ORR
6452 ;; -------------------------------------------------------------------------
6453
6454 ;; Binary logical operations on floating-point modes.  We avoid subregs
6455 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6456 ;; aren't defined for floating-point modes.
6457 (define_insn "*<optab><mode>3"
6458   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6459         (unspec:SVE_FULL_F
6460           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6461            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6462           LOGICALF))]
6463   "TARGET_SVE"
6464   "<logicalf_op>\t%0.d, %1.d, %2.d"
6465 )
6466
6467 ;; -------------------------------------------------------------------------
6468 ;; ---- [FP] Sign copying
6469 ;; -------------------------------------------------------------------------
6470 ;; The patterns in this section are synthetic.
6471 ;; -------------------------------------------------------------------------
6472
6473 (define_expand "copysign<mode>3"
6474   [(match_operand:SVE_FULL_F 0 "register_operand")
6475    (match_operand:SVE_FULL_F 1 "register_operand")
6476    (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
6477   "TARGET_SVE"
6478   {
6479     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6480     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6481     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6482     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6483
6484     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6485     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6486
6487     rtx v_sign_bitmask
6488       = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6489                                            HOST_WIDE_INT_M1U << bits);
6490
6491     /* copysign (x, -1) should instead be expanded as orr with the sign
6492        bit.  */
6493     if (!REG_P (operands[2]))
6494       {
6495         rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
6496         if (GET_CODE (op2_elt) == CONST_DOUBLE
6497             && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6498           {
6499             emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
6500             emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6501             DONE;
6502           }
6503       }
6504
6505     operands[2] = force_reg (<MODE>mode, operands[2]);
6506     emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
6507     emit_insn (gen_and<v_int_equiv>3
6508                (mant, arg1,
6509                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6510                                                    ~(HOST_WIDE_INT_M1U
6511                                                      << bits))));
6512     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6513     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6514     DONE;
6515   }
6516 )
6517
6518 (define_expand "cond_copysign<mode>"
6519   [(match_operand:SVE_FULL_F 0 "register_operand")
6520    (match_operand:<VPRED> 1 "register_operand")
6521    (match_operand:SVE_FULL_F 2 "register_operand")
6522    (match_operand:SVE_FULL_F 3 "nonmemory_operand")
6523    (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6524   "TARGET_SVE"
6525   {
6526     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6527     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6528     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6529     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6530
6531     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6532     rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
6533     rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
6534
6535     rtx v_sign_bitmask
6536       = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6537                                            HOST_WIDE_INT_M1U << bits);
6538
6539     /* copysign (x, -1) should instead be expanded as orr with the sign
6540        bit.  */
6541     if (!REG_P (operands[3]))
6542       {
6543         rtx op2_elt = unwrap_const_vec_duplicate (operands[3]);
6544         if (GET_CODE (op2_elt) == CONST_DOUBLE
6545             && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6546           {
6547             arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
6548             emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
6549                                                   arg3, arg4));
6550             emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6551             DONE;
6552           }
6553       }
6554
6555     operands[2] = force_reg (<MODE>mode, operands[3]);
6556     emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
6557     emit_insn (gen_and<v_int_equiv>3
6558                (mant, arg2,
6559                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6560                                                    ~(HOST_WIDE_INT_M1U
6561                                                      << bits))));
6562     emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
6563                                           arg4));
6564     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6565     DONE;
6566   }
6567 )
6568
6569 (define_expand "xorsign<mode>3"
6570   [(match_operand:SVE_FULL_F 0 "register_operand")
6571    (match_operand:SVE_FULL_F 1 "register_operand")
6572    (match_operand:SVE_FULL_F 2 "register_operand")]
6573   "TARGET_SVE"
6574   {
6575     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6576     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6577     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6578
6579     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6580     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6581
6582     emit_insn (gen_and<v_int_equiv>3
6583                (sign, arg2,
6584                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6585                                                    HOST_WIDE_INT_M1U
6586                                                    << bits)));
6587     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6588     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6589     DONE;
6590   }
6591 )
6592
6593 ;; -------------------------------------------------------------------------
6594 ;; ---- [FP] Maximum and minimum
6595 ;; -------------------------------------------------------------------------
6596 ;; Includes:
6597 ;; - FMAX
6598 ;; - FMAXNM
6599 ;; - FMIN
6600 ;; - FMINNM
6601 ;; -------------------------------------------------------------------------
6602
6603 ;; Predicated floating-point maximum/minimum.
6604 (define_insn "@aarch64_pred_<optab><mode>"
6605   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6606         (unspec:SVE_FULL_F
6607           [(match_operand:<VPRED> 1 "register_operand")
6608            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6609            (match_operand:SVE_FULL_F 2 "register_operand")
6610            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6611           SVE_COND_FP_MAXMIN))]
6612   "TARGET_SVE"
6613   {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
6614      [ w        , Upl , %0 , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6615      [ w        , Upl , 0  , w   ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6616      [ ?&w      , Upl , w  , vsB ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6617      [ ?&w      , Upl , w  , w   ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6618   }
6619 )
6620
6621 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6622 ;; SVE_COND_FP_BINARY_I1.
6623
6624 ;; -------------------------------------------------------------------------
6625 ;; ---- [PRED] Binary logical operations
6626 ;; -------------------------------------------------------------------------
6627 ;; Includes:
6628 ;; - AND
6629 ;; - ANDS
6630 ;; - EOR
6631 ;; - EORS
6632 ;; - ORR
6633 ;; - ORRS
6634 ;; -------------------------------------------------------------------------
6635
6636 ;; Predicate AND.  We can reuse one of the inputs as the GP.
6637 ;; Doubling the second operand is the preferred implementation
6638 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6639 (define_insn "and<mode>3"
6640   [(set (match_operand:PRED_ALL 0 "register_operand")
6641         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
6642                       (match_operand:PRED_ALL 2 "register_operand")))]
6643   "TARGET_SVE"
6644   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
6645      [ &Upa    , Upa , Upa ; yes                 ] and\t%0.b, %1/z, %2.b, %2.b
6646      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
6647      [ Upa     , Upa , Upa ; no                  ] ^
6648   }
6649 )
6650
6651 ;; Unpredicated predicate EOR and ORR.
6652 (define_expand "<optab><mode>3"
6653   [(set (match_operand:PRED_ALL 0 "register_operand")
6654         (and:PRED_ALL
6655           (LOGICAL_OR:PRED_ALL
6656             (match_operand:PRED_ALL 1 "register_operand")
6657             (match_operand:PRED_ALL 2 "register_operand"))
6658           (match_dup 3)))]
6659   "TARGET_SVE"
6660   {
6661     operands[3] = aarch64_ptrue_reg (<MODE>mode);
6662   }
6663 )
6664
6665 ;; Predicated predicate AND, EOR and ORR.
6666 (define_insn "@aarch64_pred_<optab><mode>_z"
6667   [(set (match_operand:PRED_ALL 0 "register_operand")
6668         (and:PRED_ALL
6669           (LOGICAL:PRED_ALL
6670             (match_operand:PRED_ALL 2 "register_operand")
6671             (match_operand:PRED_ALL 3 "register_operand"))
6672           (match_operand:PRED_ALL 1 "register_operand")))]
6673   "TARGET_SVE"
6674   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6675      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>\t%0.b, %1/z, %2.b, %3.b
6676      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6677      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6678   }
6679 )
6680
6681 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6682 ;; the GP.  Store the result in operand 0 and set the flags in the same
6683 ;; way as for PTEST.
6684 (define_insn "*<optab><mode>3_cc"
6685   [(set (reg:CC_NZC CC_REGNUM)
6686         (unspec:CC_NZC
6687           [(match_operand:VNx16BI 1 "register_operand")
6688            (match_operand 4)
6689            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6690            (and:PRED_ALL
6691              (LOGICAL:PRED_ALL
6692                (match_operand:PRED_ALL 2 "register_operand")
6693                (match_operand:PRED_ALL 3 "register_operand"))
6694              (match_dup 4))]
6695           UNSPEC_PTEST))
6696    (set (match_operand:PRED_ALL 0 "register_operand")
6697         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6698                       (match_dup 4)))]
6699   "TARGET_SVE"
6700   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6701      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6702      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6703      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6704   }
6705 )
6706
6707 ;; Same with just the flags result.
6708 (define_insn "*<optab><mode>3_ptest"
6709   [(set (reg:CC_NZC CC_REGNUM)
6710         (unspec:CC_NZC
6711           [(match_operand:VNx16BI 1 "register_operand")
6712            (match_operand 4)
6713            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6714            (and:PRED_ALL
6715              (LOGICAL:PRED_ALL
6716                (match_operand:PRED_ALL 2 "register_operand")
6717                (match_operand:PRED_ALL 3 "register_operand"))
6718              (match_dup 4))]
6719           UNSPEC_PTEST))
6720    (clobber (match_scratch:VNx16BI 0))]
6721   "TARGET_SVE"
6722   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6723      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6724      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6725      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6726   }
6727 )
6728
6729 ;; -------------------------------------------------------------------------
6730 ;; ---- [PRED] Binary logical operations (inverted second input)
6731 ;; -------------------------------------------------------------------------
6732 ;; Includes:
6733 ;; - BIC
6734 ;; - ORN
6735 ;; -------------------------------------------------------------------------
6736
6737 ;; Predicated predicate BIC and ORN.
6738 (define_insn "aarch64_pred_<nlogical><mode>_z"
6739   [(set (match_operand:PRED_ALL 0 "register_operand")
6740         (and:PRED_ALL
6741           (NLOGICAL:PRED_ALL
6742             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand"))
6743             (match_operand:PRED_ALL 2 "register_operand"))
6744           (match_operand:PRED_ALL 1 "register_operand")))]
6745   "TARGET_SVE"
6746   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6747      [ &Upa    , Upa , Upa , Upa ; yes                 ] <nlogical>\t%0.b, %1/z, %2.b, %3.b
6748      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6749      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6750   }
6751 )
6752
6753 ;; Same, but set the flags as a side-effect.
6754 (define_insn "*<nlogical><mode>3_cc"
6755   [(set (reg:CC_NZC CC_REGNUM)
6756         (unspec:CC_NZC
6757           [(match_operand:VNx16BI 1 "register_operand")
6758            (match_operand 4)
6759            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6760            (and:PRED_ALL
6761              (NLOGICAL:PRED_ALL
6762                (not:PRED_ALL
6763                  (match_operand:PRED_ALL 3 "register_operand"))
6764                (match_operand:PRED_ALL 2 "register_operand"))
6765              (match_dup 4))]
6766           UNSPEC_PTEST))
6767    (set (match_operand:PRED_ALL 0 "register_operand")
6768         (and:PRED_ALL (NLOGICAL:PRED_ALL
6769                         (not:PRED_ALL (match_dup 3))
6770                         (match_dup 2))
6771                       (match_dup 4)))]
6772   "TARGET_SVE"
6773   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6774      [ &Upa    , Upa , Upa , Upa ; yes                 ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6775      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6776      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6777   }
6778 )
6779
6780 ;; Same with just the flags result.
6781 (define_insn "*<nlogical><mode>3_ptest"
6782   [(set (reg:CC_NZC CC_REGNUM)
6783         (unspec:CC_NZC
6784           [(match_operand:VNx16BI 1 "register_operand")
6785            (match_operand 4)
6786            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6787            (and:PRED_ALL
6788              (NLOGICAL:PRED_ALL
6789                (not:PRED_ALL
6790                  (match_operand:PRED_ALL 3 "register_operand"))
6791                (match_operand:PRED_ALL 2 "register_operand"))
6792              (match_dup 4))]
6793           UNSPEC_PTEST))
6794    (clobber (match_scratch:VNx16BI 0))]
6795   "TARGET_SVE"
6796   {@ [ cons:  =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6797      [ &Upa     , Upa , Upa , Upa ; yes                 ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6798      [ ?Upa     , 0Upa, 0Upa, 0Upa; yes                 ] ^
6799      [ Upa      , Upa , Upa , Upa ; no                  ] ^
6800   }
6801 )
6802
6803 ;; -------------------------------------------------------------------------
6804 ;; ---- [PRED] Binary logical operations (inverted result)
6805 ;; -------------------------------------------------------------------------
6806 ;; Includes:
6807 ;; - NAND
6808 ;; - NOR
6809 ;; -------------------------------------------------------------------------
6810
6811 ;; Predicated predicate NAND and NOR.
6812 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6813   [(set (match_operand:PRED_ALL 0 "register_operand")
6814         (and:PRED_ALL
6815           (NLOGICAL:PRED_ALL
6816             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand"))
6817             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand")))
6818           (match_operand:PRED_ALL 1 "register_operand")))]
6819   "TARGET_SVE"
6820   {@ [ cons: =0,  1  , 2   , 3   ; attrs: pred_clobber ]
6821      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>\t%0.b, %1/z, %2.b, %3.b
6822      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6823      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6824   }
6825 )
6826
6827 ;; Same, but set the flags as a side-effect.
6828 (define_insn "*<logical_nn><mode>3_cc"
6829   [(set (reg:CC_NZC CC_REGNUM)
6830         (unspec:CC_NZC
6831           [(match_operand:VNx16BI 1 "register_operand")
6832            (match_operand 4)
6833            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6834            (and:PRED_ALL
6835              (NLOGICAL:PRED_ALL
6836                (not:PRED_ALL
6837                  (match_operand:PRED_ALL 2 "register_operand"))
6838                (not:PRED_ALL
6839                  (match_operand:PRED_ALL 3 "register_operand")))
6840              (match_dup 4))]
6841           UNSPEC_PTEST))
6842    (set (match_operand:PRED_ALL 0 "register_operand")
6843         (and:PRED_ALL (NLOGICAL:PRED_ALL
6844                         (not:PRED_ALL (match_dup 2))
6845                         (not:PRED_ALL (match_dup 3)))
6846                       (match_dup 4)))]
6847   "TARGET_SVE"
6848   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6849      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6850      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6851      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6852   }
6853 )
6854
6855 ;; Same with just the flags result.
6856 (define_insn "*<logical_nn><mode>3_ptest"
6857   [(set (reg:CC_NZC CC_REGNUM)
6858         (unspec:CC_NZC
6859           [(match_operand:VNx16BI 1 "register_operand")
6860            (match_operand 4)
6861            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6862            (and:PRED_ALL
6863              (NLOGICAL:PRED_ALL
6864                (not:PRED_ALL
6865                  (match_operand:PRED_ALL 2 "register_operand"))
6866                (not:PRED_ALL
6867                  (match_operand:PRED_ALL 3 "register_operand")))
6868              (match_dup 4))]
6869           UNSPEC_PTEST))
6870    (clobber (match_scratch:VNx16BI 0))]
6871   "TARGET_SVE"
6872   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6873      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6874      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6875      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6876   }
6877 )
6878
6879 ;; =========================================================================
6880 ;; == Ternary arithmetic
6881 ;; =========================================================================
6882
6883 ;; -------------------------------------------------------------------------
6884 ;; ---- [INT] MLA and MAD
6885 ;; -------------------------------------------------------------------------
6886 ;; Includes:
6887 ;; - MAD
6888 ;; - MLA
6889 ;; -------------------------------------------------------------------------
6890
6891 ;; Unpredicated integer addition of product.
6892 (define_expand "fma<mode>4"
6893   [(set (match_operand:SVE_I 0 "register_operand")
6894         (plus:SVE_I
6895           (unspec:SVE_I
6896             [(match_dup 4)
6897              (mult:SVE_I
6898                (match_operand:SVE_I 1 "register_operand")
6899                (match_operand:SVE_I 2 "nonmemory_operand"))]
6900             UNSPEC_PRED_X)
6901           (match_operand:SVE_I 3 "register_operand")))]
6902   "TARGET_SVE"
6903   {
6904     if (aarch64_prepare_sve_int_fma (operands, PLUS))
6905       DONE;
6906     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6907   }
6908 )
6909
6910 ;; Predicated integer addition of product.
6911 (define_insn "@aarch64_pred_fma<mode>"
6912   [(set (match_operand:SVE_I 0 "register_operand")
6913         (plus:SVE_I
6914           (unspec:SVE_I
6915             [(match_operand:<VPRED> 1 "register_operand")
6916              (mult:SVE_I
6917                (match_operand:SVE_I 2 "register_operand")
6918                (match_operand:SVE_I 3 "register_operand"))]
6919             UNSPEC_PRED_X)
6920           (match_operand:SVE_I 4 "register_operand")))]
6921   "TARGET_SVE"
6922   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
6923      [ w        , Upl , %0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6924      [ w        , Upl , w  , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6925      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6926   }
6927 )
6928
6929 ;; Predicated integer addition of product with merging.
6930 (define_expand "cond_fma<mode>"
6931   [(set (match_operand:SVE_I 0 "register_operand")
6932         (unspec:SVE_I
6933           [(match_operand:<VPRED> 1 "register_operand")
6934            (plus:SVE_I
6935              (mult:SVE_I
6936                (match_operand:SVE_I 2 "register_operand")
6937                (match_operand:SVE_I 3 "general_operand"))
6938              (match_operand:SVE_I 4 "register_operand"))
6939            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6940           UNSPEC_SEL))]
6941   "TARGET_SVE"
6942   {
6943     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6944       DONE;
6945     /* Swap the multiplication operands if the fallback value is the
6946        second of the two.  */
6947     if (rtx_equal_p (operands[3], operands[5]))
6948       std::swap (operands[2], operands[3]);
6949   }
6950 )
6951
6952 ;; Predicated integer addition of product, merging with the first input.
6953 (define_insn "*cond_fma<mode>_2"
6954   [(set (match_operand:SVE_I 0 "register_operand")
6955         (unspec:SVE_I
6956           [(match_operand:<VPRED> 1 "register_operand")
6957            (plus:SVE_I
6958              (mult:SVE_I
6959                (match_operand:SVE_I 2 "register_operand")
6960                (match_operand:SVE_I 3 "register_operand"))
6961              (match_operand:SVE_I 4 "register_operand"))
6962            (match_dup 2)]
6963           UNSPEC_SEL))]
6964   "TARGET_SVE"
6965   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
6966      [ w        , Upl , 0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6967      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6968   }
6969 )
6970
6971 ;; Predicated integer addition of product, merging with the third input.
6972 (define_insn "*cond_fma<mode>_4"
6973   [(set (match_operand:SVE_I 0 "register_operand")
6974         (unspec:SVE_I
6975           [(match_operand:<VPRED> 1 "register_operand")
6976            (plus:SVE_I
6977              (mult:SVE_I
6978                (match_operand:SVE_I 2 "register_operand")
6979                (match_operand:SVE_I 3 "register_operand"))
6980              (match_operand:SVE_I 4 "register_operand"))
6981            (match_dup 4)]
6982           UNSPEC_SEL))]
6983   "TARGET_SVE"
6984   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
6985      [ w        , Upl , w , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6986      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6987   }
6988 )
6989
6990 ;; Predicated integer addition of product, merging with an independent value.
6991 (define_insn_and_rewrite "*cond_fma<mode>_any"
6992   [(set (match_operand:SVE_I 0 "register_operand")
6993         (unspec:SVE_I
6994           [(match_operand:<VPRED> 1 "register_operand")
6995            (plus:SVE_I
6996              (mult:SVE_I
6997                (match_operand:SVE_I 2 "register_operand")
6998                (match_operand:SVE_I 3 "register_operand"))
6999              (match_operand:SVE_I 4 "register_operand"))
7000            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7001           UNSPEC_SEL))]
7002   "TARGET_SVE
7003    && !rtx_equal_p (operands[2], operands[5])
7004    && !rtx_equal_p (operands[3], operands[5])
7005    && !rtx_equal_p (operands[4], operands[5])"
7006   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7007      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7008      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7009      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7010      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7011      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7012      [ ?&w      , Upl , w , w , w , w   ] #
7013   }
7014   "&& reload_completed
7015    && register_operand (operands[5], <MODE>mode)
7016    && !rtx_equal_p (operands[0], operands[5])"
7017   {
7018     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7019                                              operands[5], operands[1]));
7020     operands[5] = operands[4] = operands[0];
7021   }
7022   [(set_attr "movprfx" "yes")]
7023 )
7024
7025 ;; -------------------------------------------------------------------------
7026 ;; ---- [INT] MLS and MSB
7027 ;; -------------------------------------------------------------------------
7028 ;; Includes:
7029 ;; - MLS
7030 ;; - MSB
7031 ;; -------------------------------------------------------------------------
7032
7033 ;; Unpredicated integer subtraction of product.
7034 (define_expand "fnma<mode>4"
7035   [(set (match_operand:SVE_I 0 "register_operand")
7036         (minus:SVE_I
7037           (match_operand:SVE_I 3 "register_operand")
7038           (unspec:SVE_I
7039             [(match_dup 4)
7040              (mult:SVE_I
7041                (match_operand:SVE_I 1 "register_operand")
7042                (match_operand:SVE_I 2 "general_operand"))]
7043             UNSPEC_PRED_X)))]
7044   "TARGET_SVE"
7045   {
7046     if (aarch64_prepare_sve_int_fma (operands, MINUS))
7047       DONE;
7048     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7049   }
7050 )
7051
7052 ;; Predicated integer subtraction of product.
7053 (define_insn "@aarch64_pred_fnma<mode>"
7054   [(set (match_operand:SVE_I 0 "register_operand")
7055         (minus:SVE_I
7056           (match_operand:SVE_I 4 "register_operand")
7057           (unspec:SVE_I
7058             [(match_operand:<VPRED> 1 "register_operand")
7059              (mult:SVE_I
7060                (match_operand:SVE_I 2 "register_operand")
7061                (match_operand:SVE_I 3 "register_operand"))]
7062             UNSPEC_PRED_X)))]
7063   "TARGET_SVE"
7064   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
7065      [ w        , Upl , %0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7066      [ w        , Upl , w  , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7067      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7068   }
7069 )
7070
7071 ;; Predicated integer subtraction of product with merging.
7072 (define_expand "cond_fnma<mode>"
7073   [(set (match_operand:SVE_I 0 "register_operand")
7074    (unspec:SVE_I
7075         [(match_operand:<VPRED> 1 "register_operand")
7076          (minus:SVE_I
7077            (match_operand:SVE_I 4 "register_operand")
7078            (mult:SVE_I
7079              (match_operand:SVE_I 2 "register_operand")
7080              (match_operand:SVE_I 3 "general_operand")))
7081          (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7082         UNSPEC_SEL))]
7083   "TARGET_SVE"
7084   {
7085     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
7086       DONE;
7087     /* Swap the multiplication operands if the fallback value is the
7088        second of the two.  */
7089     if (rtx_equal_p (operands[3], operands[5]))
7090       std::swap (operands[2], operands[3]);
7091   }
7092 )
7093
7094 ;; Predicated integer subtraction of product, merging with the first input.
7095 (define_insn "*cond_fnma<mode>_2"
7096   [(set (match_operand:SVE_I 0 "register_operand")
7097         (unspec:SVE_I
7098           [(match_operand:<VPRED> 1 "register_operand")
7099            (minus:SVE_I
7100              (match_operand:SVE_I 4 "register_operand")
7101              (mult:SVE_I
7102                (match_operand:SVE_I 2 "register_operand")
7103                (match_operand:SVE_I 3 "register_operand")))
7104            (match_dup 2)]
7105           UNSPEC_SEL))]
7106   "TARGET_SVE"
7107   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7108      [ w        , Upl , 0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7109      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7110   }
7111 )
7112
7113 ;; Predicated integer subtraction of product, merging with the third input.
7114 (define_insn "*cond_fnma<mode>_4"
7115   [(set (match_operand:SVE_I 0 "register_operand")
7116         (unspec:SVE_I
7117           [(match_operand:<VPRED> 1 "register_operand")
7118            (minus:SVE_I
7119              (match_operand:SVE_I 4 "register_operand")
7120              (mult:SVE_I
7121                (match_operand:SVE_I 2 "register_operand")
7122                (match_operand:SVE_I 3 "register_operand")))
7123            (match_dup 4)]
7124           UNSPEC_SEL))]
7125   "TARGET_SVE"
7126   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7127      [ w        , Upl , w , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7128      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7129   }
7130 )
7131
7132 ;; Predicated integer subtraction of product, merging with an
7133 ;; independent value.
7134 (define_insn_and_rewrite "*cond_fnma<mode>_any"
7135   [(set (match_operand:SVE_I 0 "register_operand")
7136         (unspec:SVE_I
7137           [(match_operand:<VPRED> 1 "register_operand")
7138            (minus:SVE_I
7139              (match_operand:SVE_I 4 "register_operand")
7140              (mult:SVE_I
7141                (match_operand:SVE_I 2 "register_operand")
7142                (match_operand:SVE_I 3 "register_operand")))
7143            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7144           UNSPEC_SEL))]
7145   "TARGET_SVE
7146    && !rtx_equal_p (operands[2], operands[5])
7147    && !rtx_equal_p (operands[3], operands[5])
7148    && !rtx_equal_p (operands[4], operands[5])"
7149   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7150      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7151      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7152      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7153      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7154      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7155      [ ?&w      , Upl , w , w , w , w   ] #
7156   }
7157   "&& reload_completed
7158    && register_operand (operands[5], <MODE>mode)
7159    && !rtx_equal_p (operands[0], operands[5])"
7160   {
7161     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7162                                              operands[5], operands[1]));
7163     operands[5] = operands[4] = operands[0];
7164   }
7165   [(set_attr "movprfx" "yes")]
7166 )
7167
7168 ;; -------------------------------------------------------------------------
7169 ;; ---- [INT] Dot product
7170 ;; -------------------------------------------------------------------------
7171 ;; Includes:
7172 ;; - SDOT
7173 ;; - SUDOT   (I8MM)
7174 ;; - UDOT
7175 ;; - USDOT   (I8MM)
7176 ;; -------------------------------------------------------------------------
7177
7178 ;; Four-element integer dot-product with accumulation.
7179 (define_insn "<sur>dot_prod<mode><vsi2qi>"
7180   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7181         (plus:SVE_FULL_SDI
7182           (unspec:SVE_FULL_SDI
7183             [(match_operand:<VSI2QI> 1 "register_operand")
7184              (match_operand:<VSI2QI> 2 "register_operand")]
7185             DOTPROD)
7186           (match_operand:SVE_FULL_SDI 3 "register_operand")))]
7187   "TARGET_SVE"
7188   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7189      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7190      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7191   }
7192 )
7193
7194 ;; Four-element integer dot-product by selected lanes with accumulation.
7195 (define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
7196   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7197         (plus:SVE_FULL_SDI
7198           (unspec:SVE_FULL_SDI
7199             [(match_operand:SVE_FULL_BHI 1 "register_operand")
7200              (unspec:SVE_FULL_BHI
7201                [(match_operand:SVE_FULL_BHI 2 "register_operand")
7202                 (match_operand:SI 3 "const_int_operand")]
7203                UNSPEC_SVE_LANE_SELECT)]
7204             DOTPROD)
7205           (match_operand:SVE_FULL_SDI 4 "register_operand")))]
7206   "TARGET_SVE
7207    && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
7208        || (TARGET_STREAMING_SME2
7209            && <SVE_FULL_SDI:elem_bits> == 32
7210            && <SVE_FULL_BHI:elem_bits> == 16))"
7211   {@ [ cons: =0 , 1 , 2                           , 4 ; attrs: movprfx ]
7212      [ w        , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; *              ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7213      [ ?&w      , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7214   }
7215 )
7216
7217 (define_insn "@<sur>dot_prod<mode><vsi2qi>"
7218   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7219         (plus:VNx4SI_ONLY
7220           (unspec:VNx4SI_ONLY
7221             [(match_operand:<VSI2QI> 1 "register_operand")
7222              (match_operand:<VSI2QI> 2 "register_operand")]
7223             DOTPROD_US_ONLY)
7224           (match_operand:VNx4SI_ONLY 3 "register_operand")))]
7225   "TARGET_SVE_I8MM"
7226   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7227      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.s, %1.b, %2.b
7228      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.b, %2.b
7229   }
7230 )
7231
7232 (define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
7233   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7234         (plus:VNx4SI_ONLY
7235           (unspec:VNx4SI_ONLY
7236             [(match_operand:VNx16QI_ONLY 1 "register_operand")
7237              (unspec:VNx16QI_ONLY
7238                [(match_operand:VNx16QI_ONLY 2 "register_operand")
7239                 (match_operand:SI 3 "const_int_operand")]
7240                UNSPEC_SVE_LANE_SELECT)]
7241             DOTPROD_I8MM)
7242           (match_operand:VNx4SI_ONLY 4 "register_operand")))]
7243   "TARGET_SVE_I8MM"
7244   {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7245      [ w        , w , y , 0 ; *              ] <sur>dot\t%0.s, %1.b, %2.b[%3]
7246      [ ?&w      , w , y , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.s, %1.b, %2.b[%3]
7247   }
7248 )
7249
7250 ;; -------------------------------------------------------------------------
7251 ;; ---- [INT] Sum of absolute differences
7252 ;; -------------------------------------------------------------------------
7253 ;; The patterns in this section are synthetic.
7254 ;; -------------------------------------------------------------------------
7255
7256 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
7257 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
7258 ;; the difference into a vector and accumulate that into operand 3 before
7259 ;; copying that into the result operand 0.
7260 ;; Perform that with a sequence of:
7261 ;; MOV          ones.b, #1
7262 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
7263 ;; MOVPRFX      op0, op3        // If necessary
7264 ;; UDOT         op0.s, diff.b, ones.b
7265 (define_expand "<su>sad<vsi2qi>"
7266   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
7267    (USMAX:<VSI2QI> (match_operand:<VSI2QI> 1 "register_operand")
7268                    (match_operand:<VSI2QI> 2 "register_operand"))
7269    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
7270   "TARGET_SVE"
7271   {
7272     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
7273     rtx diff = gen_reg_rtx (<VSI2QI>mode);
7274     emit_insn (gen_<su>abd<vsi2qi>3 (diff, operands[1], operands[2]));
7275     emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], diff, ones,
7276                                             operands[3]));
7277     DONE;
7278   }
7279 )
7280
7281 ;; -------------------------------------------------------------------------
7282 ;; ---- [INT] Matrix multiply-accumulate
7283 ;; -------------------------------------------------------------------------
7284 ;; Includes:
7285 ;; - SMMLA (I8MM)
7286 ;; - UMMLA (I8MM)
7287 ;; - USMMLA (I8MM)
7288 ;; -------------------------------------------------------------------------
7289
7290 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
7291   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7292         (plus:VNx4SI_ONLY
7293           (unspec:VNx4SI_ONLY
7294             [(match_operand:<VSI2QI> 2 "register_operand")
7295              (match_operand:<VSI2QI> 3 "register_operand")]
7296             MATMUL)
7297           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
7298   "TARGET_SVE_I8MM && TARGET_NON_STREAMING"
7299   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7300      [ w        , 0 , w , w ; *              ] <sur>mmla\t%0.s, %2.b, %3.b
7301      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sur>mmla\t%0.s, %2.b, %3.b
7302   }
7303 )
7304
7305 ;; -------------------------------------------------------------------------
7306 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
7307 ;; -------------------------------------------------------------------------
7308 ;; Includes merging patterns for:
7309 ;; - FMAD
7310 ;; - FMLA
7311 ;; - FMLS
7312 ;; - FMSB
7313 ;; - FNMAD
7314 ;; - FNMLA
7315 ;; - FNMLS
7316 ;; - FNMSB
7317 ;; -------------------------------------------------------------------------
7318
7319 ;; Unpredicated floating-point ternary operations.
7320 (define_expand "<optab><mode>4"
7321   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7322         (unspec:SVE_FULL_F
7323           [(match_dup 4)
7324            (const_int SVE_RELAXED_GP)
7325            (match_operand:SVE_FULL_F 1 "register_operand")
7326            (match_operand:SVE_FULL_F 2 "register_operand")
7327            (match_operand:SVE_FULL_F 3 "register_operand")]
7328           SVE_COND_FP_TERNARY))]
7329   "TARGET_SVE"
7330   {
7331     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7332   }
7333 )
7334
7335 ;; Predicated floating-point ternary operations.
7336 (define_insn "@aarch64_pred_<optab><mode>"
7337   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7338         (unspec:SVE_FULL_F
7339           [(match_operand:<VPRED> 1 "register_operand")
7340            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7341            (match_operand:SVE_FULL_F 2 "register_operand")
7342            (match_operand:SVE_FULL_F 3 "register_operand")
7343            (match_operand:SVE_FULL_F 4 "register_operand")]
7344           SVE_COND_FP_TERNARY))]
7345   "TARGET_SVE"
7346   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
7347      [ w        , Upl , %w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7348      [ w        , Upl , 0  , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7349      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7350   }
7351 )
7352
7353 ;; Predicated floating-point ternary operations with merging.
7354 (define_expand "@cond_<optab><mode>"
7355   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7356         (unspec:SVE_FULL_F
7357           [(match_operand:<VPRED> 1 "register_operand")
7358            (unspec:SVE_FULL_F
7359              [(match_dup 1)
7360               (const_int SVE_STRICT_GP)
7361               (match_operand:SVE_FULL_F 2 "register_operand")
7362               (match_operand:SVE_FULL_F 3 "register_operand")
7363               (match_operand:SVE_FULL_F 4 "register_operand")]
7364              SVE_COND_FP_TERNARY)
7365            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7366           UNSPEC_SEL))]
7367   "TARGET_SVE"
7368 {
7369   /* Swap the multiplication operands if the fallback value is the
7370      second of the two.  */
7371   if (rtx_equal_p (operands[3], operands[5]))
7372     std::swap (operands[2], operands[3]);
7373 })
7374
7375 ;; Predicated floating-point ternary operations, merging with the
7376 ;; first input.
7377 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7378   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7379         (unspec:SVE_FULL_F
7380           [(match_operand:<VPRED> 1 "register_operand")
7381            (unspec:SVE_FULL_F
7382              [(match_operand 5)
7383               (const_int SVE_RELAXED_GP)
7384               (match_operand:SVE_FULL_F 2 "register_operand")
7385               (match_operand:SVE_FULL_F 3 "register_operand")
7386               (match_operand:SVE_FULL_F 4 "register_operand")]
7387              SVE_COND_FP_TERNARY)
7388            (match_dup 2)]
7389           UNSPEC_SEL))]
7390   "TARGET_SVE"
7391   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7392      [ w        , Upl , 0 , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7393      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7394   }
7395   "&& !rtx_equal_p (operands[1], operands[5])"
7396   {
7397     operands[5] = copy_rtx (operands[1]);
7398   }
7399 )
7400
7401 (define_insn "*cond_<optab><mode>_2_strict"
7402   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7403         (unspec:SVE_FULL_F
7404           [(match_operand:<VPRED> 1 "register_operand")
7405            (unspec:SVE_FULL_F
7406              [(match_dup 1)
7407               (const_int SVE_STRICT_GP)
7408               (match_operand:SVE_FULL_F 2 "register_operand")
7409               (match_operand:SVE_FULL_F 3 "register_operand")
7410               (match_operand:SVE_FULL_F 4 "register_operand")]
7411              SVE_COND_FP_TERNARY)
7412            (match_dup 2)]
7413           UNSPEC_SEL))]
7414   "TARGET_SVE"
7415   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7416      [ w        , Upl , 0 , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7417      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7418   }
7419 )
7420
7421 ;; Predicated floating-point ternary operations, merging with the
7422 ;; third input.
7423 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7424   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7425         (unspec:SVE_FULL_F
7426           [(match_operand:<VPRED> 1 "register_operand")
7427            (unspec:SVE_FULL_F
7428              [(match_operand 5)
7429               (const_int SVE_RELAXED_GP)
7430               (match_operand:SVE_FULL_F 2 "register_operand")
7431               (match_operand:SVE_FULL_F 3 "register_operand")
7432               (match_operand:SVE_FULL_F 4 "register_operand")]
7433              SVE_COND_FP_TERNARY)
7434            (match_dup 4)]
7435           UNSPEC_SEL))]
7436   "TARGET_SVE"
7437   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7438      [ w        , Upl , w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7439      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7440   }
7441   "&& !rtx_equal_p (operands[1], operands[5])"
7442   {
7443     operands[5] = copy_rtx (operands[1]);
7444   }
7445 )
7446
7447 (define_insn "*cond_<optab><mode>_4_strict"
7448   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7449         (unspec:SVE_FULL_F
7450           [(match_operand:<VPRED> 1 "register_operand")
7451            (unspec:SVE_FULL_F
7452              [(match_dup 1)
7453               (const_int SVE_STRICT_GP)
7454               (match_operand:SVE_FULL_F 2 "register_operand")
7455               (match_operand:SVE_FULL_F 3 "register_operand")
7456               (match_operand:SVE_FULL_F 4 "register_operand")]
7457              SVE_COND_FP_TERNARY)
7458            (match_dup 4)]
7459           UNSPEC_SEL))]
7460   "TARGET_SVE"
7461   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7462      [ w        , Upl , w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7463      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7464   }
7465 )
7466
7467 ;; Predicated floating-point ternary operations, merging with an
7468 ;; independent value.
7469 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7470   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7471         (unspec:SVE_FULL_F
7472           [(match_operand:<VPRED> 1 "register_operand")
7473            (unspec:SVE_FULL_F
7474              [(match_operand 6)
7475               (const_int SVE_RELAXED_GP)
7476               (match_operand:SVE_FULL_F 2 "register_operand")
7477               (match_operand:SVE_FULL_F 3 "register_operand")
7478               (match_operand:SVE_FULL_F 4 "register_operand")]
7479              SVE_COND_FP_TERNARY)
7480            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7481           UNSPEC_SEL))]
7482   "TARGET_SVE
7483    && !rtx_equal_p (operands[2], operands[5])
7484    && !rtx_equal_p (operands[3], operands[5])
7485    && !rtx_equal_p (operands[4], operands[5])"
7486   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7487      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7488      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7489      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7490      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7491      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7492      [ ?&w      , Upl , w , w , w , w   ] #
7493   }
7494   "&& 1"
7495   {
7496     if (reload_completed
7497         && register_operand (operands[5], <MODE>mode)
7498         && !rtx_equal_p (operands[0], operands[5]))
7499       {
7500         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7501                                                  operands[5], operands[1]));
7502         operands[5] = operands[4] = operands[0];
7503       }
7504     else if (!rtx_equal_p (operands[1], operands[6]))
7505       operands[6] = copy_rtx (operands[1]);
7506     else
7507       FAIL;
7508   }
7509   [(set_attr "movprfx" "yes")]
7510 )
7511
7512 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7513   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7514         (unspec:SVE_FULL_F
7515           [(match_operand:<VPRED> 1 "register_operand")
7516            (unspec:SVE_FULL_F
7517              [(match_dup 1)
7518               (const_int SVE_STRICT_GP)
7519               (match_operand:SVE_FULL_F 2 "register_operand")
7520               (match_operand:SVE_FULL_F 3 "register_operand")
7521               (match_operand:SVE_FULL_F 4 "register_operand")]
7522              SVE_COND_FP_TERNARY)
7523            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7524           UNSPEC_SEL))]
7525   "TARGET_SVE
7526    && !rtx_equal_p (operands[2], operands[5])
7527    && !rtx_equal_p (operands[3], operands[5])
7528    && !rtx_equal_p (operands[4], operands[5])"
7529   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7530      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7531      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7532      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7533      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7534      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7535      [ ?&w      , Upl , w , w , w , w   ] #
7536   }
7537   "&& reload_completed
7538    && register_operand (operands[5], <MODE>mode)
7539    && !rtx_equal_p (operands[0], operands[5])"
7540   {
7541     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7542                                              operands[5], operands[1]));
7543     operands[5] = operands[4] = operands[0];
7544   }
7545   [(set_attr "movprfx" "yes")]
7546 )
7547
7548 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
7549 ;; (fma ...) since target-independent code won't understand the indexing.
7550 (define_insn "@aarch64_<optab>_lane_<mode>"
7551   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7552         (unspec:SVE_FULL_F
7553           [(match_operand:SVE_FULL_F 1 "register_operand")
7554            (unspec:SVE_FULL_F
7555              [(match_operand:SVE_FULL_F 2 "register_operand")
7556               (match_operand:SI 3 "const_int_operand")]
7557              UNSPEC_SVE_LANE_SELECT)
7558            (match_operand:SVE_FULL_F 4 "register_operand")]
7559           SVE_FP_TERNARY_LANE))]
7560   "TARGET_SVE"
7561   {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
7562      [ w        , w , <sve_lane_con> , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7563      [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7564   }
7565 )
7566
7567 ;; -------------------------------------------------------------------------
7568 ;; ---- [FP] Complex multiply-add
7569 ;; -------------------------------------------------------------------------
7570 ;; Includes merging patterns for:
7571 ;; - FCMLA
7572 ;; -------------------------------------------------------------------------
7573
7574 ;; Predicated FCMLA.
7575 (define_insn "@aarch64_pred_<optab><mode>"
7576   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7577         (unspec:SVE_FULL_F
7578           [(match_operand:<VPRED> 1 "register_operand")
7579            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7580            (match_operand:SVE_FULL_F 2 "register_operand")
7581            (match_operand:SVE_FULL_F 3 "register_operand")
7582            (match_operand:SVE_FULL_F 4 "register_operand")]
7583           SVE_COND_FCMLA))]
7584   "TARGET_SVE"
7585   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7586      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7587      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7588   }
7589 )
7590
7591 ;; unpredicated optab pattern for auto-vectorizer
7592 ;; The complex mla/mls operations always need to expand to two instructions.
7593 ;; The first operation does half the computation and the second does the
7594 ;; remainder.  Because of this, expand early.
7595 (define_expand "cml<fcmac1><conj_op><mode>4"
7596   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7597         (unspec:SVE_FULL_F
7598           [(match_dup 4)
7599            (match_dup 5)
7600            (match_operand:SVE_FULL_F 1 "register_operand")
7601            (match_operand:SVE_FULL_F 2 "register_operand")
7602            (match_operand:SVE_FULL_F 3 "register_operand")]
7603           FCMLA_OP))]
7604   "TARGET_SVE"
7605 {
7606   operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7607   operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7608   rtx tmp = gen_reg_rtx (<MODE>mode);
7609   emit_insn
7610     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7611                                              operands[2], operands[1],
7612                                              operands[3], operands[5]));
7613   emit_insn
7614     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7615                                              operands[2], operands[1],
7616                                              tmp, operands[5]));
7617   DONE;
7618 })
7619
7620 ;; unpredicated optab pattern for auto-vectorizer
7621 ;; The complex mul operations always need to expand to two instructions.
7622 ;; The first operation does half the computation and the second does the
7623 ;; remainder.  Because of this, expand early.
7624 (define_expand "cmul<conj_op><mode>3"
7625   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7626         (unspec:SVE_FULL_F
7627            [(match_operand:SVE_FULL_F 1 "register_operand")
7628             (match_operand:SVE_FULL_F 2 "register_operand")]
7629           FCMUL_OP))]
7630   "TARGET_SVE"
7631 {
7632   rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7633   rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7634   rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7635   rtx tmp = gen_reg_rtx (<MODE>mode);
7636   emit_insn
7637     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7638                                              operands[2], operands[1],
7639                                              accum, gp_mode));
7640   emit_insn
7641     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7642                                              operands[2], operands[1],
7643                                              tmp, gp_mode));
7644   DONE;
7645 })
7646
7647 ;; Predicated FCMLA with merging.
7648 (define_expand "@cond_<optab><mode>"
7649   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7650         (unspec:SVE_FULL_F
7651           [(match_operand:<VPRED> 1 "register_operand")
7652            (unspec:SVE_FULL_F
7653              [(match_dup 1)
7654               (const_int SVE_STRICT_GP)
7655               (match_operand:SVE_FULL_F 2 "register_operand")
7656               (match_operand:SVE_FULL_F 3 "register_operand")
7657               (match_operand:SVE_FULL_F 4 "register_operand")]
7658              SVE_COND_FCMLA)
7659            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7660           UNSPEC_SEL))]
7661   "TARGET_SVE"
7662 )
7663
7664 ;; Predicated FCMLA, merging with the third input.
7665 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7666   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7667         (unspec:SVE_FULL_F
7668           [(match_operand:<VPRED> 1 "register_operand")
7669            (unspec:SVE_FULL_F
7670              [(match_operand 5)
7671               (const_int SVE_RELAXED_GP)
7672               (match_operand:SVE_FULL_F 2 "register_operand")
7673               (match_operand:SVE_FULL_F 3 "register_operand")
7674               (match_operand:SVE_FULL_F 4 "register_operand")]
7675              SVE_COND_FCMLA)
7676            (match_dup 4)]
7677           UNSPEC_SEL))]
7678   "TARGET_SVE"
7679   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7680      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7681      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7682   }
7683   "&& !rtx_equal_p (operands[1], operands[5])"
7684   {
7685     operands[5] = copy_rtx (operands[1]);
7686   }
7687 )
7688
7689 (define_insn "*cond_<optab><mode>_4_strict"
7690   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7691         (unspec:SVE_FULL_F
7692           [(match_operand:<VPRED> 1 "register_operand")
7693            (unspec:SVE_FULL_F
7694              [(match_dup 1)
7695               (const_int SVE_STRICT_GP)
7696               (match_operand:SVE_FULL_F 2 "register_operand")
7697               (match_operand:SVE_FULL_F 3 "register_operand")
7698               (match_operand:SVE_FULL_F 4 "register_operand")]
7699              SVE_COND_FCMLA)
7700            (match_dup 4)]
7701           UNSPEC_SEL))]
7702   "TARGET_SVE"
7703   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7704      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7705      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7706   }
7707 )
7708
7709 ;; Predicated FCMLA, merging with an independent value.
7710 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7711   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7712         (unspec:SVE_FULL_F
7713           [(match_operand:<VPRED> 1 "register_operand")
7714            (unspec:SVE_FULL_F
7715              [(match_operand 6)
7716               (const_int SVE_RELAXED_GP)
7717               (match_operand:SVE_FULL_F 2 "register_operand")
7718               (match_operand:SVE_FULL_F 3 "register_operand")
7719               (match_operand:SVE_FULL_F 4 "register_operand")]
7720              SVE_COND_FCMLA)
7721            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7722           UNSPEC_SEL))]
7723   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7724   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7725      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7726      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7727      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7728      [ ?&w      , Upl , w , w , w , w   ] #
7729   }
7730   "&& 1"
7731   {
7732     if (reload_completed
7733         && register_operand (operands[5], <MODE>mode)
7734         && !rtx_equal_p (operands[0], operands[5]))
7735       {
7736         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7737                                                  operands[5], operands[1]));
7738         operands[5] = operands[4] = operands[0];
7739       }
7740     else if (!rtx_equal_p (operands[1], operands[6]))
7741       operands[6] = copy_rtx (operands[1]);
7742     else
7743       FAIL;
7744   }
7745   [(set_attr "movprfx" "yes")]
7746 )
7747
7748 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7749   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7750         (unspec:SVE_FULL_F
7751           [(match_operand:<VPRED> 1 "register_operand")
7752            (unspec:SVE_FULL_F
7753              [(match_dup 1)
7754               (const_int SVE_STRICT_GP)
7755               (match_operand:SVE_FULL_F 2 "register_operand")
7756               (match_operand:SVE_FULL_F 3 "register_operand")
7757               (match_operand:SVE_FULL_F 4 "register_operand")]
7758              SVE_COND_FCMLA)
7759            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7760           UNSPEC_SEL))]
7761   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7762   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7763      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7764      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7765      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7766      [ ?&w      , Upl , w , w , w , w   ] #
7767   }
7768   "&& reload_completed
7769    && register_operand (operands[5], <MODE>mode)
7770    && !rtx_equal_p (operands[0], operands[5])"
7771   {
7772     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7773                                              operands[5], operands[1]));
7774     operands[5] = operands[4] = operands[0];
7775   }
7776   [(set_attr "movprfx" "yes")]
7777 )
7778
7779 ;; Unpredicated FCMLA with indexing.
7780 (define_insn "@aarch64_<optab>_lane_<mode>"
7781   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7782         (unspec:SVE_FULL_HSF
7783           [(match_operand:SVE_FULL_HSF 1 "register_operand")
7784            (unspec:SVE_FULL_HSF
7785              [(match_operand:SVE_FULL_HSF 2 "register_operand")
7786               (match_operand:SI 3 "const_int_operand")]
7787              UNSPEC_SVE_LANE_SELECT)
7788            (match_operand:SVE_FULL_HSF 4 "register_operand")]
7789           FCMLA))]
7790   "TARGET_SVE"
7791   {@ [ cons: =0 , 1 , 2                   , 4 ; attrs: movprfx ]
7792      [ w        , w , <sve_lane_pair_con> , 0 ; *              ] fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7793      [ ?&w      , w , <sve_lane_pair_con> , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7794   }
7795 )
7796
7797 ;; -------------------------------------------------------------------------
7798 ;; ---- [FP] Trigonometric multiply-add
7799 ;; -------------------------------------------------------------------------
7800 ;; Includes:
7801 ;; - FTMAD
7802 ;; -------------------------------------------------------------------------
7803
7804 (define_insn "@aarch64_sve_tmad<mode>"
7805   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7806         (unspec:SVE_FULL_F
7807           [(match_operand:SVE_FULL_F 1 "register_operand")
7808            (match_operand:SVE_FULL_F 2 "register_operand")
7809            (match_operand:DI 3 "const_int_operand")]
7810           UNSPEC_FTMAD))]
7811   "TARGET_SVE"
7812   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
7813      [ w        , 0 , w ; *              ] ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7814      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7815   }
7816 )
7817
7818 ;; -------------------------------------------------------------------------
7819 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7820 ;; -------------------------------------------------------------------------
7821 ;; Includes:
7822 ;; - BFDOT (BF16)
7823 ;; - BFMLALB (BF16)
7824 ;; - BFMLALT (BF16)
7825 ;; - BFMLSLB (SME2)
7826 ;; - BFMLSLT (SME2)
7827 ;; - BFMMLA (BF16)
7828 ;; -------------------------------------------------------------------------
7829
7830 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7831   [(set (match_operand:VNx4SF 0 "register_operand")
7832         (unspec:VNx4SF
7833           [(match_operand:VNx4SF 1 "register_operand")
7834            (match_operand:VNx8BF 2 "register_operand")
7835            (match_operand:VNx8BF 3 "register_operand")]
7836           SVE_BFLOAT_TERNARY_LONG))]
7837   "TARGET_SVE_BF16"
7838   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7839      [ w        , 0 , w , w ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h
7840      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
7841   }
7842 )
7843
7844 ;; The immediate range is enforced before generating the instruction.
7845 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7846   [(set (match_operand:VNx4SF 0 "register_operand")
7847         (unspec:VNx4SF
7848           [(match_operand:VNx4SF 1 "register_operand")
7849            (match_operand:VNx8BF 2 "register_operand")
7850            (match_operand:VNx8BF 3 "register_operand")
7851            (match_operand:SI 4 "const_int_operand")]
7852           SVE_BFLOAT_TERNARY_LONG_LANE))]
7853   "TARGET_SVE_BF16"
7854   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7855      [ w        , 0 , w , y ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7856      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7857   }
7858 )
7859
7860 ;; -------------------------------------------------------------------------
7861 ;; ---- [FP] Matrix multiply-accumulate
7862 ;; -------------------------------------------------------------------------
7863 ;; Includes:
7864 ;; - FMMLA (F32MM,F64MM)
7865 ;; -------------------------------------------------------------------------
7866
7867 ;; The mode iterator enforces the target requirements.
7868 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7869   [(set (match_operand:SVE_MATMULF 0 "register_operand")
7870         (unspec:SVE_MATMULF
7871           [(match_operand:SVE_MATMULF 2 "register_operand")
7872            (match_operand:SVE_MATMULF 3 "register_operand")
7873            (match_operand:SVE_MATMULF 1 "register_operand")]
7874           FMMLA))]
7875   "TARGET_SVE && TARGET_NON_STREAMING"
7876   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7877      [ w        , 0 , w , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7878      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7879   }
7880 )
7881
7882 ;; =========================================================================
7883 ;; == Comparisons and selects
7884 ;; =========================================================================
7885
7886 ;; -------------------------------------------------------------------------
7887 ;; ---- [INT,FP] Select based on predicates
7888 ;; -------------------------------------------------------------------------
7889 ;; Includes merging patterns for:
7890 ;; - FMOV
7891 ;; - MOV
7892 ;; - SEL
7893 ;; -------------------------------------------------------------------------
7894
7895 ;; vcond_mask operand order: true, false, mask
7896 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7897 ;; SEL operand order:        mask, true, false
7898 (define_expand "@vcond_mask_<mode><vpred>"
7899   [(set (match_operand:SVE_ALL 0 "register_operand")
7900         (unspec:SVE_ALL
7901           [(match_operand:<VPRED> 3 "register_operand")
7902            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7903            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7904           UNSPEC_SEL))]
7905   "TARGET_SVE"
7906   {
7907     if (register_operand (operands[1], <MODE>mode))
7908       operands[2] = force_reg (<MODE>mode, operands[2]);
7909   }
7910 )
7911
7912 ;; Selects between:
7913 ;; - two registers
7914 ;; - a duplicated immediate and a register
7915 ;; - a duplicated immediate and zero
7916 ;;
7917 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
7918 ;; the container size or the element size.  If SEL used the container size,
7919 ;; it would ignore undefined bits of the predicate but would copy the
7920 ;; upper (undefined) bits of each container along with the defined bits.
7921 ;; If SEL used the element size, it would use undefined bits of the predicate
7922 ;; to select between undefined elements in each input vector.  Thus the only
7923 ;; difference is whether the undefined bits in a container always come from
7924 ;; the same input as the defined bits, or whether the choice can vary
7925 ;; independently of the defined bits.
7926 ;;
7927 ;; For the other instructions, using the element size is more natural,
7928 ;; so we do that for SEL as well.
7929 (define_insn "*vcond_mask_<mode><vpred>"
7930   [(set (match_operand:SVE_ALL 0 "register_operand")
7931         (unspec:SVE_ALL
7932           [(match_operand:<VPRED> 3 "register_operand")
7933            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7934            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7935           UNSPEC_SEL))]
7936   "TARGET_SVE
7937    && (!register_operand (operands[1], <MODE>mode)
7938        || register_operand (operands[2], <MODE>mode))"
7939   {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
7940      [ w        , w   , w  , Upa ; *              ] sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7941      [ w        , vss , 0  , Upa ; *              ] mov\t%0.<Vetype>, %3/m, #%I1
7942      [ w        , vss , Dz , Upa ; *              ] mov\t%0.<Vetype>, %3/z, #%I1
7943      [ w        , Ufc , 0  , Upa ; *              ] fmov\t%0.<Vetype>, %3/m, #%1
7944      [ ?w       , Ufc , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7945      [ ?&w      , vss , w  , Upa ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7946      [ ?&w      , Ufc , w  , Upa ; yes            ] movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1
7947   }
7948 )
7949
7950 ;; Optimize selects between a duplicated scalar variable and another vector,
7951 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
7952 ;; of GPRs as being more expensive than duplicates of FPRs, since they
7953 ;; involve a cross-file move.
7954 (define_insn "@aarch64_sel_dup<mode>"
7955   [(set (match_operand:SVE_ALL 0 "register_operand")
7956         (unspec:SVE_ALL
7957           [(match_operand:<VPRED> 3 "register_operand")
7958            (vec_duplicate:SVE_ALL
7959              (match_operand:<VEL> 1 "register_operand"))
7960            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7961           UNSPEC_SEL))]
7962   "TARGET_SVE"
7963   {@ [ cons: =0 , 1 , 2  , 3   ; attrs: movprfx ]
7964      [ ?w       , r , 0  , Upl ; *              ] mov\t%0.<Vetype>, %3/m, %<vwcore>1
7965      [ w        , w , 0  , Upl ; *              ] mov\t%0.<Vetype>, %3/m, %<Vetype>1
7966      [ ??w      , r , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7967      [ ?&w      , w , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7968      [ ??&w     , r , w  , Upl ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7969      [ ?&w      , w , w  , Upl ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7970   }
7971 )
7972
7973 ;; -------------------------------------------------------------------------
7974 ;; ---- [INT,FP] Compare and select
7975 ;; -------------------------------------------------------------------------
7976 ;; The patterns in this section are synthetic.
7977 ;; -------------------------------------------------------------------------
7978
7979 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
7980 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7981 (define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
7982   [(set (match_operand:SVE_ALL 0 "register_operand")
7983         (if_then_else:SVE_ALL
7984           (match_operator 3 "comparison_operator"
7985             [(match_operand:SVE_I 4 "register_operand")
7986              (match_operand:SVE_I 5 "nonmemory_operand")])
7987           (match_operand:SVE_ALL 1 "nonmemory_operand")
7988           (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7989   "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7990   {
7991     aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7992     DONE;
7993   }
7994 )
7995
7996 ;; Integer vcondu.  Don't enforce an immediate range here, since it
7997 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7998 (define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
7999   [(set (match_operand:SVE_ALL 0 "register_operand")
8000         (if_then_else:SVE_ALL
8001           (match_operator 3 "comparison_operator"
8002             [(match_operand:SVE_I 4 "register_operand")
8003              (match_operand:SVE_I 5 "nonmemory_operand")])
8004           (match_operand:SVE_ALL 1 "nonmemory_operand")
8005           (match_operand:SVE_ALL 2 "nonmemory_operand")))]
8006   "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
8007   {
8008     aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
8009     DONE;
8010   }
8011 )
8012
8013 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
8014 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
8015 (define_expand "vcond<mode><v_fp_equiv>"
8016   [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
8017         (if_then_else:SVE_FULL_HSD
8018           (match_operator 3 "comparison_operator"
8019             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
8020              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
8021           (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
8022           (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
8023   "TARGET_SVE"
8024   {
8025     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
8026     DONE;
8027   }
8028 )
8029
8030 ;; -------------------------------------------------------------------------
8031 ;; ---- [INT] Comparisons
8032 ;; -------------------------------------------------------------------------
8033 ;; Includes:
8034 ;; - CMPEQ
8035 ;; - CMPGE
8036 ;; - CMPGT
8037 ;; - CMPHI
8038 ;; - CMPHS
8039 ;; - CMPLE
8040 ;; - CMPLO
8041 ;; - CMPLS
8042 ;; - CMPLT
8043 ;; - CMPNE
8044 ;; -------------------------------------------------------------------------
8045
8046 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
8047 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8048 ;; instead.
8049 (define_expand "vec_cmp<mode><vpred>"
8050   [(parallel
8051     [(set (match_operand:<VPRED> 0 "register_operand")
8052           (match_operator:<VPRED> 1 "comparison_operator"
8053             [(match_operand:SVE_I 2 "register_operand")
8054              (match_operand:SVE_I 3 "nonmemory_operand")]))
8055      (clobber (reg:CC_NZC CC_REGNUM))])]
8056   "TARGET_SVE"
8057   {
8058     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8059                                     operands[2], operands[3]);
8060     DONE;
8061   }
8062 )
8063
8064 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
8065 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8066 ;; instead.
8067 (define_expand "vec_cmpu<mode><vpred>"
8068   [(parallel
8069     [(set (match_operand:<VPRED> 0 "register_operand")
8070           (match_operator:<VPRED> 1 "comparison_operator"
8071             [(match_operand:SVE_I 2 "register_operand")
8072              (match_operand:SVE_I 3 "nonmemory_operand")]))
8073      (clobber (reg:CC_NZC CC_REGNUM))])]
8074   "TARGET_SVE"
8075   {
8076     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8077                                     operands[2], operands[3]);
8078     DONE;
8079   }
8080 )
8081
8082 ;; Predicated integer comparisons.
8083 ;;
8084 ;; For unpacked vectors, only the lowpart element in each input container
8085 ;; has a defined value, and only the predicate bits associated with
8086 ;; those elements are defined.  For example, when comparing two VNx2SIs:
8087 ;;
8088 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
8089 ;;   DI container store an SI element.  The upper bits of each DI container
8090 ;;   are undefined.
8091 ;;
8092 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
8093 ;;   even elements are defined and the odd elements are undefined.
8094 ;;
8095 ;; - The associated predicate mode is VNx2BI.  This means that only the
8096 ;;   low bit in each predicate byte is defined (on input and on output).
8097 ;;
8098 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
8099 ;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
8100 ;;   the .s operation as operating on VNx4SIs then for odd lanes:
8101 ;;
8102 ;;   - the input governing predicate bit is undefined
8103 ;;   - the SI elements being compared are undefined
8104 ;;   - the predicate result bit is therefore undefined, but
8105 ;;   - the predicate result bit is in the undefined part of a VNx2BI,
8106 ;;     so its value doesn't matter anyway.
8107 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
8108   [(set (match_operand:<VPRED> 0 "register_operand")
8109         (unspec:<VPRED>
8110           [(match_operand:<VPRED> 1 "register_operand")
8111            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8112            (SVE_INT_CMP:<VPRED>
8113              (match_operand:SVE_I 3 "register_operand")
8114              (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8115           UNSPEC_PRED_Z))
8116    (clobber (reg:CC_NZC CC_REGNUM))]
8117   "TARGET_SVE"
8118   {@ [ cons: =0 , 1  , 3 , 4            ; attrs: pred_clobber ]
8119      [ &Upa     , Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
8120      [ ?Upl     , 0  , w , <sve_imm_con>; yes                 ] ^
8121      [ Upa      , Upl, w , <sve_imm_con>; no                  ] ^
8122      [ &Upa     , Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8123      [ ?Upl     , 0  , w , w            ; yes                 ] ^
8124      [ Upa      , Upl, w , w            ; no                  ] ^
8125   }
8126 )
8127
8128 ;; Predicated integer comparisons in which both the flag and predicate
8129 ;; results are interesting.
8130 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
8131   [(set (reg:CC_NZC CC_REGNUM)
8132         (unspec:CC_NZC
8133           [(match_operand:VNx16BI 1 "register_operand")
8134            (match_operand 4)
8135            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8136            (unspec:<VPRED>
8137              [(match_operand 6)
8138               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8139               (SVE_INT_CMP:<VPRED>
8140                 (match_operand:SVE_I 2 "register_operand")
8141                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8142              UNSPEC_PRED_Z)]
8143           UNSPEC_PTEST))
8144    (set (match_operand:<VPRED> 0 "register_operand")
8145         (unspec:<VPRED>
8146           [(match_dup 6)
8147            (match_dup 7)
8148            (SVE_INT_CMP:<VPRED>
8149              (match_dup 2)
8150              (match_dup 3))]
8151           UNSPEC_PRED_Z))]
8152   "TARGET_SVE
8153    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8154   {@ [ cons: =0 , 1   , 2 , 3            ; attrs: pred_clobber ]
8155      [ &Upa     ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8156      [ ?Upl     ,  0  , w , <sve_imm_con>; yes                 ] ^
8157      [ Upa      ,  Upl, w , <sve_imm_con>; no                  ] ^
8158      [ &Upa     ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8159      [ ?Upl     ,  0  , w , w            ; yes                 ] ^
8160      [ Upa      ,  Upl, w , w            ; no                  ] ^
8161   }
8162   "&& !rtx_equal_p (operands[4], operands[6])"
8163   {
8164     operands[6] = copy_rtx (operands[4]);
8165     operands[7] = operands[5];
8166   }
8167 )
8168
8169 ;; Predicated integer comparisons in which only the flags result is
8170 ;; interesting.
8171 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
8172   [(set (reg:CC_NZC CC_REGNUM)
8173         (unspec:CC_NZC
8174           [(match_operand:VNx16BI 1 "register_operand")
8175            (match_operand 4)
8176            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8177            (unspec:<VPRED>
8178              [(match_operand 6)
8179               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8180               (SVE_INT_CMP:<VPRED>
8181                 (match_operand:SVE_I 2 "register_operand")
8182                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8183              UNSPEC_PRED_Z)]
8184           UNSPEC_PTEST))
8185    (clobber (match_scratch:<VPRED> 0))]
8186   "TARGET_SVE
8187    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8188   {@ [ cons: =0, 1    , 2 , 3            ; attrs: pred_clobber ]
8189      [ &Upa    ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8190      [ ?Upl    ,  0  , w , <sve_imm_con>; yes                 ] ^
8191      [ Upa     ,  Upl, w , <sve_imm_con>; no                  ] ^
8192      [ &Upa    ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8193      [ ?Upl    ,  0  , w , w            ; yes                 ] ^
8194      [ Upa     ,  Upl, w , w            ; no                  ] ^
8195   }
8196   "&& !rtx_equal_p (operands[4], operands[6])"
8197   {
8198     operands[6] = copy_rtx (operands[4]);
8199     operands[7] = operands[5];
8200   }
8201 )
8202
8203 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
8204 ;; comparison with an AND.  Split the instruction into its preferred form
8205 ;; at the earliest opportunity, in order to get rid of the redundant
8206 ;; operand 4.
8207 (define_insn_and_split "*cmp<cmp_op><mode>_and"
8208   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8209         (and:<VPRED>
8210           (unspec:<VPRED>
8211             [(match_operand 4)
8212              (const_int SVE_KNOWN_PTRUE)
8213              (SVE_INT_CMP:<VPRED>
8214                (match_operand:SVE_I 2 "register_operand" "w, w")
8215                (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
8216             UNSPEC_PRED_Z)
8217           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
8218    (clobber (reg:CC_NZC CC_REGNUM))]
8219   "TARGET_SVE"
8220   "#"
8221   "&& 1"
8222   [(parallel
8223      [(set (match_dup 0)
8224            (unspec:<VPRED>
8225              [(match_dup 1)
8226               (const_int SVE_MAYBE_NOT_PTRUE)
8227               (SVE_INT_CMP:<VPRED>
8228                 (match_dup 2)
8229                 (match_dup 3))]
8230              UNSPEC_PRED_Z))
8231       (clobber (reg:CC_NZC CC_REGNUM))])]
8232 )
8233
8234 ;; Predicated integer wide comparisons.
8235 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
8236   [(set (match_operand:<VPRED> 0 "register_operand")
8237         (unspec:<VPRED>
8238           [(match_operand:VNx16BI 1 "register_operand")
8239            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8240            (unspec:<VPRED>
8241              [(match_operand:SVE_FULL_BHSI 3 "register_operand")
8242               (match_operand:VNx2DI 4 "register_operand")]
8243              SVE_COND_INT_CMP_WIDE)]
8244           UNSPEC_PRED_Z))
8245    (clobber (reg:CC_NZC CC_REGNUM))]
8246   "TARGET_SVE"
8247   {@ [ cons: =0, 1   , 2, 3, 4; attrs: pred_clobber ]
8248      [ &Upa    ,  Upl,  , w, w; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
8249      [ ?Upl    ,  0  ,  , w, w; yes                 ] ^
8250      [ Upa     ,  Upl,  , w, w; no                  ] ^
8251   }
8252 )
8253
8254 ;; Predicated integer wide comparisons in which both the flag and
8255 ;; predicate results are interesting.
8256 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
8257   [(set (reg:CC_NZC CC_REGNUM)
8258         (unspec:CC_NZC
8259           [(match_operand:VNx16BI 1 "register_operand")
8260            (match_operand 4)
8261            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8262            (unspec:<VPRED>
8263              [(match_operand:VNx16BI 6 "register_operand")
8264               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8265               (unspec:<VPRED>
8266                 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8267                  (match_operand:VNx2DI 3 "register_operand")]
8268                 SVE_COND_INT_CMP_WIDE)]
8269              UNSPEC_PRED_Z)]
8270           UNSPEC_PTEST))
8271    (set (match_operand:<VPRED> 0 "register_operand")
8272         (unspec:<VPRED>
8273           [(match_dup 6)
8274            (match_dup 7)
8275            (unspec:<VPRED>
8276              [(match_dup 2)
8277               (match_dup 3)]
8278              SVE_COND_INT_CMP_WIDE)]
8279           UNSPEC_PRED_Z))]
8280   "TARGET_SVE
8281    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8282   {@ [ cons: =0, 1   , 2, 3, 6  ; attrs: pred_clobber ]
8283      [ &Upa    ,  Upl, w, w, Upl; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8284      [ ?Upl    ,  0  , w, w, Upl; yes                 ] ^
8285      [ Upa     ,  Upl, w, w, Upl; no                  ] ^
8286   }
8287 )
8288
8289 ;; Predicated integer wide comparisons in which only the flags result
8290 ;; is interesting.
8291 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
8292   [(set (reg:CC_NZC CC_REGNUM)
8293         (unspec:CC_NZC
8294           [(match_operand:VNx16BI 1 "register_operand")
8295            (match_operand 4)
8296            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8297            (unspec:<VPRED>
8298              [(match_operand:VNx16BI 6 "register_operand")
8299               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8300               (unspec:<VPRED>
8301                 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8302                  (match_operand:VNx2DI 3 "register_operand")]
8303                 SVE_COND_INT_CMP_WIDE)]
8304              UNSPEC_PRED_Z)]
8305           UNSPEC_PTEST))
8306    (clobber (match_scratch:<VPRED> 0))]
8307   "TARGET_SVE
8308    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8309   {@ [ cons:  =0, 1   , 2, 3, 6  ; attrs: pred_clobber ]
8310      [ &Upa     ,  Upl, w, w, Upl; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8311      [ ?Upl     ,  0  , w, w, Upl; yes                 ] ^
8312      [ Upa      ,  Upl, w, w, Upl; no                  ] ^
8313   }
8314 )
8315
8316 ;; -------------------------------------------------------------------------
8317 ;; ---- [INT] While tests
8318 ;; -------------------------------------------------------------------------
8319 ;; Includes:
8320 ;; - WHILEGE (SVE2)
8321 ;; - WHILEGT (SVE2)
8322 ;; - WHILEHI (SVE2)
8323 ;; - WHILEHS (SVE2)
8324 ;; - WHILELE
8325 ;; - WHILELO
8326 ;; - WHILELS
8327 ;; - WHILELT
8328 ;; - WHILERW (SVE2)
8329 ;; - WHILEWR (SVE2)
8330 ;; -------------------------------------------------------------------------
8331
8332 (define_constants [
8333   (SVE_WHILE_B 0)
8334   (SVE_WHILE_B_X2 1)
8335   (SVE_WHILE_C 2)
8336 ])
8337
8338 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
8339 ;; true for all J in [0, I].
8340 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
8341   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8342         (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8343                           (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8344                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8345                          SVE_WHILE))
8346    (clobber (reg:CC_NZC CC_REGNUM))]
8347   "TARGET_SVE"
8348   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8349 )
8350
8351 ;; The WHILE instructions set the flags in the same way as a PTEST with
8352 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
8353 ;; operands to the PTEST aren't needed, so we allow them to be anything.
8354 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
8355   [(set (reg:CC_NZC CC_REGNUM)
8356         (unspec:CC_NZC
8357           [(match_operand 3)
8358            (match_operand 4)
8359            (const_int SVE_KNOWN_PTRUE)
8360            (unspec:PRED_ALL
8361              [(const_int SVE_WHILE_B)
8362               (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8363               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8364              SVE_WHILE)]
8365           UNSPEC_PTEST))
8366    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8367         (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8368                           (match_dup 1)
8369                           (match_dup 2)]
8370                          SVE_WHILE))]
8371   "TARGET_SVE"
8372   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8373   ;; Force the compiler to drop the unused predicate operand, so that we
8374   ;; don't have an unnecessary PTRUE.
8375   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8376   {
8377     operands[3] = CONSTM1_RTX (VNx16BImode);
8378     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8379   }
8380 )
8381
8382 ;; Same, but handle the case in which only the flags result is useful.
8383 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8384   [(set (reg:CC_NZC CC_REGNUM)
8385         (unspec:CC_NZC
8386           [(match_operand 3)
8387            (match_operand 4)
8388            (const_int SVE_KNOWN_PTRUE)
8389            (unspec:PRED_ALL
8390              [(const_int SVE_WHILE_B)
8391               (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8392               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8393              SVE_WHILE)]
8394           UNSPEC_PTEST))
8395    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8396   "TARGET_SVE"
8397   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8398   ;; Force the compiler to drop the unused predicate operand, so that we
8399   ;; don't have an unnecessary PTRUE.
8400   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8401   {
8402     operands[3] = CONSTM1_RTX (VNx16BImode);
8403     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8404   }
8405 )
8406
8407 ;; -------------------------------------------------------------------------
8408 ;; ---- [FP] Direct comparisons
8409 ;; -------------------------------------------------------------------------
8410 ;; Includes:
8411 ;; - FCMEQ
8412 ;; - FCMGE
8413 ;; - FCMGT
8414 ;; - FCMLE
8415 ;; - FCMLT
8416 ;; - FCMNE
8417 ;; - FCMUO
8418 ;; -------------------------------------------------------------------------
8419
8420 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
8421 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8422 ;; with zero.
8423 (define_expand "vec_cmp<mode><vpred>"
8424   [(set (match_operand:<VPRED> 0 "register_operand")
8425         (match_operator:<VPRED> 1 "comparison_operator"
8426           [(match_operand:SVE_FULL_F 2 "register_operand")
8427            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8428   "TARGET_SVE"
8429   {
8430     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8431                                       operands[2], operands[3], false);
8432     DONE;
8433   }
8434 )
8435
8436 ;; Predicated floating-point comparisons.
8437 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8438   [(set (match_operand:<VPRED> 0 "register_operand")
8439         (unspec:<VPRED>
8440           [(match_operand:<VPRED> 1 "register_operand")
8441            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8442            (match_operand:SVE_FULL_F 3 "register_operand")
8443            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
8444           SVE_COND_FP_CMP_I0))]
8445   "TARGET_SVE"
8446   {@ [ cons: =0 , 1   , 3 , 4   ]
8447      [ Upa      , Upl , w , Dz  ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8448      [ Upa      , Upl , w , w   ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8449   }
8450 )
8451
8452 ;; Same for unordered comparisons.
8453 (define_insn "@aarch64_pred_fcmuo<mode>"
8454   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8455         (unspec:<VPRED>
8456           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8457            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8458            (match_operand:SVE_FULL_F 3 "register_operand" "w")
8459            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8460           UNSPEC_COND_FCMUO))]
8461   "TARGET_SVE"
8462   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8463 )
8464
8465 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8466 ;; with another predicate P.  This does not have the same trapping behavior
8467 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8468 ;; since we can drop any potentially-trapping operations whose results
8469 ;; are not needed.
8470 ;;
8471 ;; Split the instruction into its preferred form (below) at the earliest
8472 ;; opportunity, in order to get rid of the redundant operand 1.
8473 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8474   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8475         (and:<VPRED>
8476           (unspec:<VPRED>
8477             [(match_operand:<VPRED> 1)
8478              (const_int SVE_KNOWN_PTRUE)
8479              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8480              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8481             SVE_COND_FP_CMP_I0)
8482           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8483   "TARGET_SVE"
8484   "#"
8485   "&& 1"
8486   [(set (match_dup 0)
8487         (unspec:<VPRED>
8488           [(match_dup 4)
8489            (const_int SVE_MAYBE_NOT_PTRUE)
8490            (match_dup 2)
8491            (match_dup 3)]
8492           SVE_COND_FP_CMP_I0))]
8493 )
8494
8495 ;; Same for unordered comparisons.
8496 (define_insn_and_split "*fcmuo<mode>_and_combine"
8497   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8498         (and:<VPRED>
8499           (unspec:<VPRED>
8500             [(match_operand:<VPRED> 1)
8501              (const_int SVE_KNOWN_PTRUE)
8502              (match_operand:SVE_FULL_F 2 "register_operand" "w")
8503              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8504             UNSPEC_COND_FCMUO)
8505           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8506   "TARGET_SVE"
8507   "#"
8508   "&& 1"
8509   [(set (match_dup 0)
8510         (unspec:<VPRED>
8511           [(match_dup 4)
8512            (const_int SVE_MAYBE_NOT_PTRUE)
8513            (match_dup 2)
8514            (match_dup 3)]
8515           UNSPEC_COND_FCMUO))]
8516 )
8517
8518 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8519 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8520 ;; the comparison on the BIC operand removes the need for a PTRUE.
8521 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8522   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8523         (and:<VPRED>
8524           (and:<VPRED>
8525             (not:<VPRED>
8526               (unspec:<VPRED>
8527                 [(match_operand:<VPRED> 1)
8528                  (const_int SVE_KNOWN_PTRUE)
8529                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8530                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8531                 SVE_COND_FP_CMP_I0))
8532             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8533           (match_dup:<VPRED> 1)))
8534    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8535   "TARGET_SVE"
8536   "#"
8537   "&& 1"
8538   [(set (match_dup 5)
8539         (unspec:<VPRED>
8540           [(match_dup 4)
8541            (const_int SVE_MAYBE_NOT_PTRUE)
8542            (match_dup 2)
8543            (match_dup 3)]
8544           SVE_COND_FP_CMP_I0))
8545    (set (match_dup 0)
8546         (and:<VPRED>
8547           (not:<VPRED>
8548             (match_dup 5))
8549           (match_dup 4)))]
8550 {
8551   if (can_create_pseudo_p ())
8552     operands[5] = gen_reg_rtx (<VPRED>mode);
8553 }
8554 )
8555
8556 ;; Make sure that we expand to a nor when the operand 4 of
8557 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8558 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8559   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8560         (and:<VPRED>
8561           (and:<VPRED>
8562             (not:<VPRED>
8563               (unspec:<VPRED>
8564                 [(match_operand:<VPRED> 1)
8565                  (const_int SVE_KNOWN_PTRUE)
8566                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8567                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8568                 SVE_COND_FP_CMP_I0))
8569             (not:<VPRED>
8570               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8571           (match_dup:<VPRED> 1)))
8572    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8573   "TARGET_SVE"
8574   "#"
8575   "&& 1"
8576   [(set (match_dup 5)
8577         (unspec:<VPRED>
8578           [(match_dup 1)
8579            (const_int SVE_KNOWN_PTRUE)
8580            (match_dup 2)
8581            (match_dup 3)]
8582           SVE_COND_FP_CMP_I0))
8583    (set (match_dup 0)
8584         (and:<VPRED>
8585           (and:<VPRED>
8586             (not:<VPRED>
8587               (match_dup 5))
8588             (not:<VPRED>
8589               (match_dup 4)))
8590           (match_dup 1)))]
8591 {
8592   if (can_create_pseudo_p ())
8593     operands[5] = gen_reg_rtx (<VPRED>mode);
8594 }
8595 )
8596
8597 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8598   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8599         (and:<VPRED>
8600           (and:<VPRED>
8601             (not:<VPRED>
8602               (unspec:<VPRED>
8603                 [(match_operand:<VPRED> 1)
8604                  (const_int SVE_KNOWN_PTRUE)
8605                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8606                  (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8607                 UNSPEC_COND_FCMUO))
8608             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8609           (match_dup:<VPRED> 1)))
8610    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8611   "TARGET_SVE"
8612   "#"
8613   "&& 1"
8614   [(set (match_dup 5)
8615         (unspec:<VPRED>
8616           [(match_dup 4)
8617            (const_int SVE_MAYBE_NOT_PTRUE)
8618            (match_dup 2)
8619            (match_dup 3)]
8620           UNSPEC_COND_FCMUO))
8621    (set (match_dup 0)
8622         (and:<VPRED>
8623           (not:<VPRED>
8624             (match_dup 5))
8625           (match_dup 4)))]
8626 {
8627   if (can_create_pseudo_p ())
8628     operands[5] = gen_reg_rtx (<VPRED>mode);
8629 }
8630 )
8631
8632 ;; Same for unordered comparisons.
8633 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8634   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8635         (and:<VPRED>
8636           (and:<VPRED>
8637             (not:<VPRED>
8638               (unspec:<VPRED>
8639                 [(match_operand:<VPRED> 1)
8640                  (const_int SVE_KNOWN_PTRUE)
8641                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8642                  (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8643                 UNSPEC_COND_FCMUO))
8644             (not:<VPRED>
8645               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8646           (match_dup:<VPRED> 1)))
8647    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8648   "TARGET_SVE"
8649   "#"
8650   "&& 1"
8651   [(set (match_dup 5)
8652         (unspec:<VPRED>
8653           [(match_dup 1)
8654            (const_int SVE_KNOWN_PTRUE)
8655            (match_dup 2)
8656            (match_dup 3)]
8657           UNSPEC_COND_FCMUO))
8658    (set (match_dup 0)
8659         (and:<VPRED>
8660           (and:<VPRED>
8661             (not:<VPRED>
8662               (match_dup 5))
8663             (not:<VPRED>
8664               (match_dup 4)))
8665           (match_dup 1)))]
8666 {
8667   if (can_create_pseudo_p ())
8668     operands[5] = gen_reg_rtx (<VPRED>mode);
8669 }
8670 )
8671
8672 ;; -------------------------------------------------------------------------
8673 ;; ---- [FP] Absolute comparisons
8674 ;; -------------------------------------------------------------------------
8675 ;; Includes:
8676 ;; - FACGE
8677 ;; - FACGT
8678 ;; - FACLE
8679 ;; - FACLT
8680 ;; -------------------------------------------------------------------------
8681
8682 ;; Predicated floating-point absolute comparisons.
8683 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8684   [(set (match_operand:<VPRED> 0 "register_operand")
8685         (unspec:<VPRED>
8686           [(match_operand:<VPRED> 1 "register_operand")
8687            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8688            (unspec:SVE_FULL_F
8689              [(match_dup 1)
8690               (match_dup 2)
8691               (match_operand:SVE_FULL_F 3 "register_operand")]
8692              UNSPEC_COND_FABS)
8693            (unspec:SVE_FULL_F
8694              [(match_dup 1)
8695               (match_dup 2)
8696               (match_operand:SVE_FULL_F 4 "register_operand")]
8697              UNSPEC_COND_FABS)]
8698           SVE_COND_FP_ABS_CMP))]
8699   "TARGET_SVE"
8700 )
8701
8702 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8703   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8704         (unspec:<VPRED>
8705           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8706            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8707            (unspec:SVE_FULL_F
8708              [(match_operand 5)
8709               (const_int SVE_RELAXED_GP)
8710               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8711              UNSPEC_COND_FABS)
8712            (unspec:SVE_FULL_F
8713              [(match_operand 6)
8714               (const_int SVE_RELAXED_GP)
8715               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8716              UNSPEC_COND_FABS)]
8717           SVE_COND_FP_ABS_CMP))]
8718   "TARGET_SVE"
8719   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8720   "&& (!rtx_equal_p (operands[1], operands[5])
8721        || !rtx_equal_p (operands[1], operands[6]))"
8722   {
8723     operands[5] = copy_rtx (operands[1]);
8724     operands[6] = copy_rtx (operands[1]);
8725   }
8726 )
8727
8728 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8729   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8730         (unspec:<VPRED>
8731           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8732            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8733            (unspec:SVE_FULL_F
8734              [(match_dup 1)
8735               (match_operand:SI 5 "aarch64_sve_gp_strictness")
8736               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8737              UNSPEC_COND_FABS)
8738            (unspec:SVE_FULL_F
8739              [(match_dup 1)
8740               (match_operand:SI 6 "aarch64_sve_gp_strictness")
8741               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8742              UNSPEC_COND_FABS)]
8743           SVE_COND_FP_ABS_CMP))]
8744   "TARGET_SVE"
8745   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8746 )
8747
8748 ;; -------------------------------------------------------------------------
8749 ;; ---- [PRED] Select
8750 ;; -------------------------------------------------------------------------
8751 ;; Includes:
8752 ;; - SEL
8753 ;; -------------------------------------------------------------------------
8754
8755 (define_insn "@vcond_mask_<mode><mode>"
8756   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8757         (ior:PRED_ALL
8758           (and:PRED_ALL
8759             (match_operand:PRED_ALL 3 "register_operand" "Upa")
8760             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8761           (and:PRED_ALL
8762             (not (match_dup 3))
8763             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8764   "TARGET_SVE"
8765   "sel\t%0.b, %3, %1.b, %2.b"
8766 )
8767
8768 ;; -------------------------------------------------------------------------
8769 ;; ---- [PRED] Test bits
8770 ;; -------------------------------------------------------------------------
8771 ;; Includes:
8772 ;; - PTEST
8773 ;; -------------------------------------------------------------------------
8774
8775 ;; Branch based on predicate equality or inequality.
8776 (define_expand "cbranch<mode>4"
8777   [(set (pc)
8778         (if_then_else
8779           (match_operator 0 "aarch64_equality_operator"
8780             [(match_operand:PRED_ALL 1 "register_operand")
8781              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8782           (label_ref (match_operand 3 ""))
8783           (pc)))]
8784   ""
8785   {
8786     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8787     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8788     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8789     rtx pred;
8790     if (operands[2] == CONST0_RTX (<MODE>mode))
8791       pred = operands[1];
8792     else
8793       {
8794         pred = gen_reg_rtx (<MODE>mode);
8795         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8796                                                  operands[2]));
8797       }
8798     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8799     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8800     operands[2] = const0_rtx;
8801   }
8802 )
8803
8804 ;; See "Description of UNSPEC_PTEST" above for details.
8805 (define_insn "aarch64_ptest<mode>"
8806   [(set (reg:CC_NZC CC_REGNUM)
8807         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8808                         (match_operand 1)
8809                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8810                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8811                        UNSPEC_PTEST))]
8812   "TARGET_SVE"
8813   "ptest\t%0, %3.b"
8814 )
8815
8816 ;; =========================================================================
8817 ;; == Reductions
8818 ;; =========================================================================
8819
8820 ;; -------------------------------------------------------------------------
8821 ;; ---- [INT,FP] Conditional reductions
8822 ;; -------------------------------------------------------------------------
8823 ;; Includes:
8824 ;; - CLASTA
8825 ;; - CLASTB
8826 ;; -------------------------------------------------------------------------
8827
8828 ;; Set operand 0 to the last active element in operand 3, or to tied
8829 ;; operand 1 if no elements are active.
8830 (define_insn "@fold_extract_<last_op>_<mode>"
8831   [(set (match_operand:<VEL> 0 "register_operand")
8832         (unspec:<VEL>
8833           [(match_operand:<VEL> 1 "register_operand")
8834            (match_operand:<VPRED> 2 "register_operand")
8835            (match_operand:SVE_FULL 3 "register_operand")]
8836           CLAST))]
8837   "TARGET_SVE"
8838   {@ [ cons: =0 , 1 , 2   , 3  ]
8839      [ ?r       , 0 , Upl , w  ] clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8840      [ w        , 0 , Upl , w  ] clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>
8841   }
8842 )
8843
8844 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8845   [(set (match_operand:SVE_FULL 0 "register_operand")
8846         (unspec:SVE_FULL
8847           [(match_operand:SVE_FULL 1 "register_operand")
8848            (match_operand:<VPRED> 2 "register_operand")
8849            (match_operand:SVE_FULL 3 "register_operand")]
8850           CLAST))]
8851   "TARGET_SVE"
8852   {@ [ cons: =0 , 1 , 2   , 3  ]
8853      [ w        , 0 , Upl , w  ] clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8854      [ ?&w      , w , Upl , w  ] movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8855   }
8856 )
8857
8858 ;; -------------------------------------------------------------------------
8859 ;; ---- [INT] Tree reductions
8860 ;; -------------------------------------------------------------------------
8861 ;; Includes:
8862 ;; - ANDV
8863 ;; - EORV
8864 ;; - ORV
8865 ;; - SADDV
8866 ;; - SMAXV
8867 ;; - SMINV
8868 ;; - UADDV
8869 ;; - UMAXV
8870 ;; - UMINV
8871 ;; -------------------------------------------------------------------------
8872
8873 ;; Unpredicated integer add reduction.
8874 (define_expand "reduc_plus_scal_<mode>"
8875   [(match_operand:<VEL> 0 "register_operand")
8876    (match_operand:SVE_FULL_I 1 "register_operand")]
8877   "TARGET_SVE"
8878   {
8879     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8880     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8881     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8882     if (tmp != operands[0])
8883       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8884     DONE;
8885   }
8886 )
8887
8888 ;; Predicated integer add reduction.  The result is always 64-bits.
8889 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8890   [(set (match_operand:DI 0 "register_operand" "=w")
8891         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8892                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8893                    SVE_INT_ADDV))]
8894   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8895   "<su>addv\t%d0, %1, %2.<Vetype>"
8896 )
8897
8898 ;; Unpredicated integer reductions.
8899 (define_expand "reduc_<optab>_scal_<mode>"
8900   [(set (match_operand:<VEL> 0 "register_operand")
8901         (unspec:<VEL> [(match_dup 2)
8902                        (match_operand:SVE_FULL_I 1 "register_operand")]
8903                       SVE_INT_REDUCTION))]
8904   "TARGET_SVE"
8905   {
8906     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8907   }
8908 )
8909
8910 ;; Predicated integer reductions.
8911 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8912   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8913         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8914                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8915                       SVE_INT_REDUCTION))]
8916   "TARGET_SVE"
8917   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8918 )
8919
8920 ;; -------------------------------------------------------------------------
8921 ;; ---- [FP] Tree reductions
8922 ;; -------------------------------------------------------------------------
8923 ;; Includes:
8924 ;; - FADDV
8925 ;; - FMAXNMV
8926 ;; - FMAXV
8927 ;; - FMINNMV
8928 ;; - FMINV
8929 ;; -------------------------------------------------------------------------
8930
8931 ;; Unpredicated floating-point tree reductions.
8932 (define_expand "reduc_<optab>_scal_<mode>"
8933   [(set (match_operand:<VEL> 0 "register_operand")
8934         (unspec:<VEL> [(match_dup 2)
8935                        (match_operand:SVE_FULL_F 1 "register_operand")]
8936                       SVE_FP_REDUCTION))]
8937   "TARGET_SVE"
8938   {
8939     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8940   }
8941 )
8942
8943 (define_expand "reduc_<fmaxmin>_scal_<mode>"
8944   [(match_operand:<VEL> 0 "register_operand")
8945    (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
8946                  FMAXMINNMV)]
8947   "TARGET_SVE"
8948   {
8949     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
8950     DONE;
8951   }
8952 )
8953
8954 ;; Predicated floating-point tree reductions.
8955 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8956   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8957         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8958                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8959                       SVE_FP_REDUCTION))]
8960   "TARGET_SVE"
8961   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
8962 )
8963
8964 ;; -------------------------------------------------------------------------
8965 ;; ---- [FP] Left-to-right reductions
8966 ;; -------------------------------------------------------------------------
8967 ;; Includes:
8968 ;; - FADDA
8969 ;; -------------------------------------------------------------------------
8970
8971 ;; Unpredicated in-order FP reductions.
8972 (define_expand "fold_left_plus_<mode>"
8973   [(set (match_operand:<VEL> 0 "register_operand")
8974         (unspec:<VEL> [(match_dup 3)
8975                        (match_operand:<VEL> 1 "register_operand")
8976                        (match_operand:SVE_FULL_F 2 "register_operand")]
8977                       UNSPEC_FADDA))]
8978   "TARGET_SVE && TARGET_NON_STREAMING"
8979   {
8980     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8981   }
8982 )
8983
8984 ;; Predicated in-order FP reductions.
8985 (define_insn "mask_fold_left_plus_<mode>"
8986   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8987         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8988                        (match_operand:<VEL> 1 "register_operand" "0")
8989                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8990                       UNSPEC_FADDA))]
8991   "TARGET_SVE && TARGET_NON_STREAMING"
8992   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
8993 )
8994
8995 ;; =========================================================================
8996 ;; == Permutes
8997 ;; =========================================================================
8998
8999 ;; -------------------------------------------------------------------------
9000 ;; ---- [INT,FP] General permutes
9001 ;; -------------------------------------------------------------------------
9002 ;; Includes:
9003 ;; - TBL
9004 ;; -------------------------------------------------------------------------
9005
9006 (define_expand "vec_perm<mode>"
9007   [(match_operand:SVE_FULL 0 "register_operand")
9008    (match_operand:SVE_FULL 1 "register_operand")
9009    (match_operand:SVE_FULL 2 "register_operand")
9010    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
9011   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9012   {
9013     aarch64_expand_sve_vec_perm (operands[0], operands[1],
9014                                  operands[2], operands[3]);
9015     DONE;
9016   }
9017 )
9018
9019 (define_insn "@aarch64_sve_tbl<mode>"
9020   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9021         (unspec:SVE_FULL
9022           [(match_operand:SVE_FULL 1 "register_operand" "w")
9023            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
9024           UNSPEC_TBL))]
9025   "TARGET_SVE"
9026   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9027 )
9028
9029 ;; -------------------------------------------------------------------------
9030 ;; ---- [INT,FP] Special-purpose unary permutes
9031 ;; -------------------------------------------------------------------------
9032 ;; Includes:
9033 ;; - COMPACT
9034 ;; - DUP
9035 ;; - REV
9036 ;; -------------------------------------------------------------------------
9037
9038 ;; Compact active elements and pad with zeros.
9039 (define_insn "@aarch64_sve_compact<mode>"
9040   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
9041         (unspec:SVE_FULL_SD
9042           [(match_operand:<VPRED> 1 "register_operand" "Upl")
9043            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
9044           UNSPEC_SVE_COMPACT))]
9045   "TARGET_SVE && TARGET_NON_STREAMING"
9046   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
9047 )
9048
9049 ;; Duplicate one element of a vector.
9050 (define_insn "@aarch64_sve_dup_lane<mode>"
9051   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9052         (vec_duplicate:SVE_ALL
9053           (vec_select:<VEL>
9054             (match_operand:SVE_ALL 1 "register_operand" "w")
9055             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
9056   "TARGET_SVE
9057    && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
9058   "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
9059 )
9060
9061 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
9062 ;;
9063 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
9064 ;; number op2 + N of op1.  (We don't need to distinguish between memory
9065 ;; and architectural register lane numbering for op1 or op0, since the
9066 ;; two numbering schemes are the same for SVE.)
9067 ;;
9068 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
9069 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
9070 ;; of op0.  We therefore get the correct result for both endiannesses.
9071 ;;
9072 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
9073 ;; is in the opposite order to architectural register lane numbering.
9074 ;; Thus if we were to do this operation via a V128 temporary register,
9075 ;; the vec_select and vec_duplicate would both involve a reverse operation
9076 ;; for big-endian targets.  In this fused pattern the two reverses cancel
9077 ;; each other out.
9078 (define_insn "@aarch64_sve_dupq_lane<mode>"
9079   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9080         (vec_duplicate:SVE_FULL
9081           (vec_select:<V128>
9082             (match_operand:SVE_FULL 1 "register_operand" "w")
9083             (match_operand 2 "ascending_int_parallel"))))]
9084   "TARGET_SVE
9085    && (INTVAL (XVECEXP (operands[2], 0, 0))
9086        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
9087    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
9088                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
9089   {
9090     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
9091                          * GET_MODE_SIZE (<VEL>mode));
9092     operands[2] = gen_int_mode (byte / 16, DImode);
9093     return "dup\t%0.q, %1.q[%2]";
9094   }
9095 )
9096
9097 ;; Reverse the order of elements within a full vector.
9098 (define_insn "@aarch64_sve_rev<mode>"
9099   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9100         (unspec:SVE_ALL
9101           [(match_operand:SVE_ALL 1 "register_operand" "w")]
9102           UNSPEC_REV))]
9103   "TARGET_SVE"
9104   "rev\t%0.<Vctype>, %1.<Vctype>")
9105
9106 ;; -------------------------------------------------------------------------
9107 ;; ---- [INT,FP] Special-purpose binary permutes
9108 ;; -------------------------------------------------------------------------
9109 ;; Includes:
9110 ;; - EXT
9111 ;; - SPLICE
9112 ;; - TRN1
9113 ;; - TRN2
9114 ;; - UZP1
9115 ;; - UZP2
9116 ;; - ZIP1
9117 ;; - ZIP2
9118 ;; -------------------------------------------------------------------------
9119
9120 ;; Like EXT, but start at the first active element.
9121 (define_insn "@aarch64_sve_splice<mode>"
9122   [(set (match_operand:SVE_FULL 0 "register_operand")
9123         (unspec:SVE_FULL
9124           [(match_operand:<VPRED> 1 "register_operand")
9125            (match_operand:SVE_FULL 2 "register_operand")
9126            (match_operand:SVE_FULL 3 "register_operand")]
9127           UNSPEC_SVE_SPLICE))]
9128   "TARGET_SVE"
9129   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
9130      [ w        , Upl , 0 , w ; *              ] splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9131      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9132   }
9133 )
9134
9135 ;; Permutes that take half the elements from one vector and half the
9136 ;; elements from the other.
9137 (define_insn "@aarch64_sve_<perm_insn><mode>"
9138   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9139         (unspec:SVE_ALL
9140           [(match_operand:SVE_ALL 1 "register_operand" "w")
9141            (match_operand:SVE_ALL 2 "register_operand" "w")]
9142           PERMUTE))]
9143   "TARGET_SVE"
9144   "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9145 )
9146
9147 ;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
9148 ;; doesn't depend on the mode.
9149 (define_insn "@aarch64_sve_<optab><mode>"
9150   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9151         (unspec:SVE_FULL
9152           [(match_operand:SVE_FULL 1 "register_operand" "w")
9153            (match_operand:SVE_FULL 2 "register_operand" "w")]
9154           PERMUTEQ))]
9155   "TARGET_SVE_F64MM"
9156   "<perm_insn>\t%0.q, %1.q, %2.q"
9157 )
9158
9159 ;; Concatenate two vectors and extract a subvector.  Note that the
9160 ;; immediate (third) operand is the lane index not the byte index.
9161 (define_insn "@aarch64_sve_ext<mode>"
9162   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
9163         (unspec:SVE_ALL
9164           [(match_operand:SVE_ALL 1 "register_operand" "0, w")
9165            (match_operand:SVE_ALL 2 "register_operand" "w, w")
9166            (match_operand:SI 3 "const_int_operand")]
9167           UNSPEC_EXT))]
9168   "TARGET_SVE
9169    && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
9170   {
9171     operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
9172     return (which_alternative == 0
9173             ? "ext\\t%0.b, %0.b, %2.b, #%3"
9174             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
9175   }
9176   [(set_attr "movprfx" "*,yes")]
9177 )
9178
9179 ;; -------------------------------------------------------------------------
9180 ;; ---- [PRED] Special-purpose unary permutes
9181 ;; -------------------------------------------------------------------------
9182 ;; Includes:
9183 ;; - REV
9184 ;; -------------------------------------------------------------------------
9185
9186 (define_insn "@aarch64_sve_rev<mode>"
9187   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9188         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
9189                          UNSPEC_REV))]
9190   "TARGET_SVE"
9191   "rev\t%0.<Vetype>, %1.<Vetype>")
9192
9193 ;; -------------------------------------------------------------------------
9194 ;; ---- [PRED] Special-purpose binary permutes
9195 ;; -------------------------------------------------------------------------
9196 ;; Includes:
9197 ;; - TRN1
9198 ;; - TRN2
9199 ;; - UZP1
9200 ;; - UZP2
9201 ;; - ZIP1
9202 ;; - ZIP2
9203 ;; -------------------------------------------------------------------------
9204
9205 ;; Permutes that take half the elements from one vector and half the
9206 ;; elements from the other.
9207 (define_insn "@aarch64_sve_<perm_insn><mode>"
9208   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9209         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9210                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9211                          PERMUTE))]
9212   "TARGET_SVE"
9213   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9214 )
9215
9216 ;; Special purpose permute used by the predicate generation instructions.
9217 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
9218 ;; regardless of the element size, so that all input and output bits are
9219 ;; well-defined.  Operand 3 then indicates the size of the permute.
9220 (define_insn "@aarch64_sve_trn1_conv<mode>"
9221   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9222         (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
9223                          (match_operand:VNx16BI 2 "register_operand" "Upa")
9224                          (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
9225                         UNSPEC_TRN1_CONV))]
9226   "TARGET_SVE"
9227   "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
9228 )
9229
9230 ;; =========================================================================
9231 ;; == Conversions
9232 ;; =========================================================================
9233
9234 ;; -------------------------------------------------------------------------
9235 ;; ---- [INT<-INT] Packs
9236 ;; -------------------------------------------------------------------------
9237 ;; Includes:
9238 ;; - UZP1
9239 ;; -------------------------------------------------------------------------
9240
9241 ;; Integer pack.  Use UZP1 on the narrower type, which discards
9242 ;; the high part of each wide element.
9243 (define_insn "vec_pack_trunc_<Vwide>"
9244   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
9245         (unspec:SVE_FULL_BHSI
9246           [(match_operand:<VWIDE> 1 "register_operand" "w")
9247            (match_operand:<VWIDE> 2 "register_operand" "w")]
9248           UNSPEC_PACK))]
9249   "TARGET_SVE"
9250   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9251 )
9252
9253 ;; -------------------------------------------------------------------------
9254 ;; ---- [INT<-INT] Unpacks
9255 ;; -------------------------------------------------------------------------
9256 ;; Includes:
9257 ;; - SUNPKHI
9258 ;; - SUNPKLO
9259 ;; - UUNPKHI
9260 ;; - UUNPKLO
9261 ;; -------------------------------------------------------------------------
9262
9263 ;; Unpack the low or high half of a vector, where "high" refers to
9264 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9265 ;; for little-endian.
9266 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
9267   [(match_operand:<VWIDE> 0 "register_operand")
9268    (unspec:<VWIDE>
9269      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
9270   "TARGET_SVE"
9271   {
9272     emit_insn ((<hi_lanes_optab>
9273                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
9274                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
9275                (operands[0], operands[1]));
9276     DONE;
9277   }
9278 )
9279
9280 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
9281   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
9282         (unspec:<VWIDE>
9283           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
9284           UNPACK))]
9285   "TARGET_SVE"
9286   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
9287 )
9288
9289 ;; -------------------------------------------------------------------------
9290 ;; ---- [INT<-FP] Conversions
9291 ;; -------------------------------------------------------------------------
9292 ;; Includes:
9293 ;; - FCVTZS
9294 ;; - FCVTZU
9295 ;; -------------------------------------------------------------------------
9296
9297 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
9298 ;; SF to SI or DF to DI).
9299 (define_expand "<optab><mode><v_int_equiv>2"
9300   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
9301         (unspec:<V_INT_EQUIV>
9302           [(match_dup 2)
9303            (const_int SVE_RELAXED_GP)
9304            (match_operand:SVE_FULL_F 1 "register_operand")]
9305           SVE_COND_FCVTI))]
9306   "TARGET_SVE"
9307   {
9308     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9309   }
9310 )
9311
9312 ;; Predicated float-to-integer conversion, either to the same width or wider.
9313 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9314   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9315         (unspec:SVE_FULL_HSDI
9316           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9317            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9318            (match_operand:SVE_FULL_F 2 "register_operand")]
9319           SVE_COND_FCVTI))]
9320   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9321   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9322      [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9323      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9324   }
9325 )
9326
9327 ;; Predicated narrowing float-to-integer conversion.
9328 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9329   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9330         (unspec:VNx4SI_ONLY
9331           [(match_operand:VNx2BI 1 "register_operand")
9332            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9333            (match_operand:VNx2DF_ONLY 2 "register_operand")]
9334           SVE_COND_FCVTI))]
9335   "TARGET_SVE"
9336   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9337      [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9338      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9339   }
9340 )
9341
9342 ;; Predicated float-to-integer conversion with merging, either to the same
9343 ;; width or wider.
9344 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9345   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9346         (unspec:SVE_FULL_HSDI
9347           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9348            (unspec:SVE_FULL_HSDI
9349              [(match_dup 1)
9350               (const_int SVE_STRICT_GP)
9351               (match_operand:SVE_FULL_F 2 "register_operand")]
9352              SVE_COND_FCVTI)
9353            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9354           UNSPEC_SEL))]
9355   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9356 )
9357
9358 ;; The first alternative doesn't need the earlyclobber, but the only case
9359 ;; it would help is the uninteresting one in which operands 2 and 3 are
9360 ;; the same register (despite having different modes).  Making all the
9361 ;; alternatives earlyclobber makes things more consistent for the
9362 ;; register allocator.
9363 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
9364   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9365         (unspec:SVE_FULL_HSDI
9366           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9367            (unspec:SVE_FULL_HSDI
9368              [(match_operand 4)
9369               (const_int SVE_RELAXED_GP)
9370               (match_operand:SVE_FULL_F 2 "register_operand")]
9371              SVE_COND_FCVTI)
9372            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9373           UNSPEC_SEL))]
9374   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9375   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9376      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9377      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9378      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9379   }
9380   "&& !rtx_equal_p (operands[1], operands[4])"
9381   {
9382     operands[4] = copy_rtx (operands[1]);
9383   }
9384 )
9385
9386 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9387   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9388         (unspec:SVE_FULL_HSDI
9389           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9390            (unspec:SVE_FULL_HSDI
9391              [(match_dup 1)
9392               (const_int SVE_STRICT_GP)
9393               (match_operand:SVE_FULL_F 2 "register_operand")]
9394              SVE_COND_FCVTI)
9395            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9396           UNSPEC_SEL))]
9397   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9398   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9399      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9400      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9401      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9402   }
9403 )
9404
9405 ;; Predicated narrowing float-to-integer conversion with merging.
9406 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9407   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9408         (unspec:VNx4SI_ONLY
9409           [(match_operand:VNx2BI 1 "register_operand")
9410            (unspec:VNx4SI_ONLY
9411              [(match_dup 1)
9412               (const_int SVE_STRICT_GP)
9413               (match_operand:VNx2DF_ONLY 2 "register_operand")]
9414              SVE_COND_FCVTI)
9415            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9416           UNSPEC_SEL))]
9417   "TARGET_SVE"
9418 )
9419
9420 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9421   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9422         (unspec:VNx4SI_ONLY
9423           [(match_operand:VNx2BI 1 "register_operand")
9424            (unspec:VNx4SI_ONLY
9425              [(match_dup 1)
9426               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9427               (match_operand:VNx2DF_ONLY 2 "register_operand")]
9428              SVE_COND_FCVTI)
9429            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9430           UNSPEC_SEL))]
9431   "TARGET_SVE"
9432   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9433      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9434      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9435      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9436   }
9437 )
9438
9439 ;; -------------------------------------------------------------------------
9440 ;; ---- [INT<-FP] Packs
9441 ;; -------------------------------------------------------------------------
9442 ;; The patterns in this section are synthetic.
9443 ;; -------------------------------------------------------------------------
9444
9445 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9446 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9447   [(set (match_dup 4)
9448         (unspec:VNx4SI
9449           [(match_dup 3)
9450            (const_int SVE_RELAXED_GP)
9451            (match_operand:VNx2DF 1 "register_operand")]
9452           SVE_COND_FCVTI))
9453    (set (match_dup 5)
9454         (unspec:VNx4SI
9455           [(match_dup 3)
9456            (const_int SVE_RELAXED_GP)
9457            (match_operand:VNx2DF 2 "register_operand")]
9458           SVE_COND_FCVTI))
9459    (set (match_operand:VNx4SI 0 "register_operand")
9460         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9461   "TARGET_SVE"
9462   {
9463     operands[3] = aarch64_ptrue_reg (VNx2BImode);
9464     operands[4] = gen_reg_rtx (VNx4SImode);
9465     operands[5] = gen_reg_rtx (VNx4SImode);
9466   }
9467 )
9468
9469 ;; -------------------------------------------------------------------------
9470 ;; ---- [INT<-FP] Unpacks
9471 ;; -------------------------------------------------------------------------
9472 ;; No patterns here yet!
9473 ;; -------------------------------------------------------------------------
9474
9475 ;; -------------------------------------------------------------------------
9476 ;; ---- [FP<-INT] Conversions
9477 ;; -------------------------------------------------------------------------
9478 ;; Includes:
9479 ;; - SCVTF
9480 ;; - UCVTF
9481 ;; -------------------------------------------------------------------------
9482
9483 ;; Unpredicated conversion of integers to floats of the same size
9484 ;; (HI to HF, SI to SF or DI to DF).
9485 (define_expand "<optab><v_int_equiv><mode>2"
9486   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9487         (unspec:SVE_FULL_F
9488           [(match_dup 2)
9489            (const_int SVE_RELAXED_GP)
9490            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9491           SVE_COND_ICVTF))]
9492   "TARGET_SVE"
9493   {
9494     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9495   }
9496 )
9497
9498 ;; Predicated integer-to-float conversion, either to the same width or
9499 ;; narrower.
9500 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9501   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9502         (unspec:SVE_FULL_F
9503           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9504            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9505            (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9506           SVE_COND_ICVTF))]
9507   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9508   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9509      [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9510      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9511   }
9512 )
9513
9514 ;; Predicated widening integer-to-float conversion.
9515 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9516   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9517         (unspec:VNx2DF_ONLY
9518           [(match_operand:VNx2BI 1 "register_operand")
9519            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9520            (match_operand:VNx4SI_ONLY 2 "register_operand")]
9521           SVE_COND_ICVTF))]
9522   "TARGET_SVE"
9523   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9524      [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9525      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9526   }
9527 )
9528
9529 ;; Predicated integer-to-float conversion with merging, either to the same
9530 ;; width or narrower.
9531 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9532   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9533         (unspec:SVE_FULL_F
9534           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9535            (unspec:SVE_FULL_F
9536              [(match_dup 1)
9537               (const_int SVE_STRICT_GP)
9538               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9539              SVE_COND_ICVTF)
9540            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9541           UNSPEC_SEL))]
9542   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9543 )
9544
9545 ;; The first alternative doesn't need the earlyclobber, but the only case
9546 ;; it would help is the uninteresting one in which operands 2 and 3 are
9547 ;; the same register (despite having different modes).  Making all the
9548 ;; alternatives earlyclobber makes things more consistent for the
9549 ;; register allocator.
9550 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9551   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9552         (unspec:SVE_FULL_F
9553           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9554            (unspec:SVE_FULL_F
9555              [(match_operand 4)
9556               (const_int SVE_RELAXED_GP)
9557               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9558              SVE_COND_ICVTF)
9559            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9560           UNSPEC_SEL))]
9561   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9562   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9563      [ &w       , Upl , w , 0  ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9564      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9565      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9566   }
9567   "&& !rtx_equal_p (operands[1], operands[4])"
9568   {
9569     operands[4] = copy_rtx (operands[1]);
9570   }
9571 )
9572
9573 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9574   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9575         (unspec:SVE_FULL_F
9576           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9577            (unspec:SVE_FULL_F
9578              [(match_dup 1)
9579               (const_int SVE_STRICT_GP)
9580               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9581              SVE_COND_ICVTF)
9582            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9583           UNSPEC_SEL))]
9584   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9585   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9586      [ &w       , Upl , w , 0  ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9587      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9588      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9589   }
9590 )
9591
9592 ;; Predicated widening integer-to-float conversion with merging.
9593 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9594   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9595         (unspec:VNx2DF_ONLY
9596           [(match_operand:VNx2BI 1 "register_operand")
9597            (unspec:VNx2DF_ONLY
9598              [(match_dup 1)
9599               (const_int SVE_STRICT_GP)
9600               (match_operand:VNx4SI_ONLY 2 "register_operand")]
9601              SVE_COND_ICVTF)
9602            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9603           UNSPEC_SEL))]
9604   "TARGET_SVE"
9605 )
9606
9607 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9608   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9609         (unspec:VNx2DF_ONLY
9610           [(match_operand:VNx2BI 1 "register_operand")
9611            (unspec:VNx2DF_ONLY
9612              [(match_dup 1)
9613               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9614               (match_operand:VNx4SI_ONLY 2 "register_operand")]
9615              SVE_COND_ICVTF)
9616            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9617           UNSPEC_SEL))]
9618   "TARGET_SVE"
9619   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9620      [ w        , Upl , w , 0  ; *              ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9621      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9622      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9623   }
9624 )
9625
9626 ;; -------------------------------------------------------------------------
9627 ;; ---- [FP<-INT] Packs
9628 ;; -------------------------------------------------------------------------
9629 ;; No patterns here yet!
9630 ;; -------------------------------------------------------------------------
9631
9632 ;; -------------------------------------------------------------------------
9633 ;; ---- [FP<-INT] Unpacks
9634 ;; -------------------------------------------------------------------------
9635 ;; The patterns in this section are synthetic.
9636 ;; -------------------------------------------------------------------------
9637
9638 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
9639 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9640 ;; unpacked VNx4SI to VNx2DF.
9641 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9642   [(match_operand:VNx2DF 0 "register_operand")
9643    (FLOATUORS:VNx2DF
9644      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9645                     UNPACK_UNSIGNED))]
9646   "TARGET_SVE"
9647   {
9648     /* Use ZIP to do the unpack, since we don't care about the upper halves
9649        and since it has the nice property of not needing any subregs.
9650        If using UUNPK* turns out to be preferable, we could model it as
9651        a ZIP whose first operand is zero.  */
9652     rtx temp = gen_reg_rtx (VNx4SImode);
9653     emit_insn ((<hi_lanes_optab>
9654                 ? gen_aarch64_sve_zip2vnx4si
9655                 : gen_aarch64_sve_zip1vnx4si)
9656                (temp, operands[1], operands[1]));
9657     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9658     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9659     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9660                (operands[0], ptrue, temp, strictness));
9661     DONE;
9662   }
9663 )
9664
9665 ;; -------------------------------------------------------------------------
9666 ;; ---- [FP<-FP] Packs
9667 ;; -------------------------------------------------------------------------
9668 ;; Includes:
9669 ;; - FCVT
9670 ;; -------------------------------------------------------------------------
9671
9672 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9673 ;; the results into a single vector.
9674 (define_expand "vec_pack_trunc_<Vwide>"
9675   [(set (match_dup 4)
9676         (unspec:SVE_FULL_HSF
9677           [(match_dup 3)
9678            (const_int SVE_RELAXED_GP)
9679            (match_operand:<VWIDE> 1 "register_operand")]
9680           UNSPEC_COND_FCVT))
9681    (set (match_dup 5)
9682         (unspec:SVE_FULL_HSF
9683           [(match_dup 3)
9684            (const_int SVE_RELAXED_GP)
9685            (match_operand:<VWIDE> 2 "register_operand")]
9686           UNSPEC_COND_FCVT))
9687    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9688         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9689   "TARGET_SVE"
9690   {
9691     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9692     operands[4] = gen_reg_rtx (<MODE>mode);
9693     operands[5] = gen_reg_rtx (<MODE>mode);
9694   }
9695 )
9696
9697 ;; Predicated float-to-float truncation.
9698 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9699   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9700         (unspec:SVE_FULL_HSF
9701           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9702            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9703            (match_operand:SVE_FULL_SDF 2 "register_operand")]
9704           SVE_COND_FCVT))]
9705   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9706   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9707      [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9708      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9709   }
9710 )
9711
9712 ;; Predicated float-to-float truncation with merging.
9713 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9714   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9715         (unspec:SVE_FULL_HSF
9716           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9717            (unspec:SVE_FULL_HSF
9718              [(match_dup 1)
9719               (const_int SVE_STRICT_GP)
9720               (match_operand:SVE_FULL_SDF 2 "register_operand")]
9721              SVE_COND_FCVT)
9722            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9723           UNSPEC_SEL))]
9724   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9725 )
9726
9727 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9728   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9729         (unspec:SVE_FULL_HSF
9730           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9731            (unspec:SVE_FULL_HSF
9732              [(match_dup 1)
9733               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9734               (match_operand:SVE_FULL_SDF 2 "register_operand")]
9735              SVE_COND_FCVT)
9736            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9737           UNSPEC_SEL))]
9738   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9739   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9740      [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9741      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9742      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9743   }
9744 )
9745
9746 ;; -------------------------------------------------------------------------
9747 ;; ---- [FP<-FP] Packs (bfloat16)
9748 ;; -------------------------------------------------------------------------
9749 ;; Includes:
9750 ;; - BFCVT (BF16)
9751 ;; - BFCVTNT (BF16)
9752 ;; -------------------------------------------------------------------------
9753
9754 ;; Predicated BFCVT.
9755 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9756   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9757         (unspec:VNx8BF_ONLY
9758           [(match_operand:VNx4BI 1 "register_operand")
9759            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9760            (match_operand:VNx4SF_ONLY 2 "register_operand")]
9761           SVE_COND_FCVT))]
9762   "TARGET_SVE_BF16"
9763   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9764      [ w        , Upl , 0 ; *              ] bfcvt\t%0.h, %1/m, %2.s
9765      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s
9766   }
9767 )
9768
9769 ;; Predicated BFCVT with merging.
9770 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9771   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9772         (unspec:VNx8BF_ONLY
9773           [(match_operand:VNx4BI 1 "register_operand")
9774            (unspec:VNx8BF_ONLY
9775              [(match_dup 1)
9776               (const_int SVE_STRICT_GP)
9777               (match_operand:VNx4SF_ONLY 2 "register_operand")]
9778              SVE_COND_FCVT)
9779            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9780           UNSPEC_SEL))]
9781   "TARGET_SVE_BF16"
9782 )
9783
9784 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9785   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9786         (unspec:VNx8BF_ONLY
9787           [(match_operand:VNx4BI 1 "register_operand")
9788            (unspec:VNx8BF_ONLY
9789              [(match_dup 1)
9790               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9791               (match_operand:VNx4SF_ONLY 2 "register_operand")]
9792              SVE_COND_FCVT)
9793            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9794           UNSPEC_SEL))]
9795   "TARGET_SVE_BF16"
9796   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9797      [ w        , Upl , w , 0  ; *              ] bfcvt\t%0.h, %1/m, %2.s
9798      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9799      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s
9800   }
9801 )
9802
9803 ;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
9804 ;; pair because the even elements always have to be supplied for active
9805 ;; elements, even if the inactive elements don't matter.
9806 ;;
9807 ;; This instructions does not take MOVPRFX.
9808 (define_insn "@aarch64_sve_cvtnt<mode>"
9809   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9810         (unspec:VNx8BF_ONLY
9811           [(match_operand:VNx4BI 2 "register_operand" "Upl")
9812            (const_int SVE_STRICT_GP)
9813            (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9814            (match_operand:VNx4SF 3 "register_operand" "w")]
9815           UNSPEC_COND_FCVTNT))]
9816   "TARGET_SVE_BF16"
9817   "bfcvtnt\t%0.h, %2/m, %3.s"
9818 )
9819
9820 ;; -------------------------------------------------------------------------
9821 ;; ---- [FP<-FP] Unpacks
9822 ;; -------------------------------------------------------------------------
9823 ;; Includes:
9824 ;; - FCVT
9825 ;; -------------------------------------------------------------------------
9826
9827 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9828 ;; First unpack the source without conversion, then float-convert the
9829 ;; unpacked source.
9830 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9831   [(match_operand:<VWIDE> 0 "register_operand")
9832    (unspec:SVE_FULL_HSF
9833      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9834      UNPACK_UNSIGNED)]
9835   "TARGET_SVE"
9836   {
9837     /* Use ZIP to do the unpack, since we don't care about the upper halves
9838        and since it has the nice property of not needing any subregs.
9839        If using UUNPK* turns out to be preferable, we could model it as
9840        a ZIP whose first operand is zero.  */
9841     rtx temp = gen_reg_rtx (<MODE>mode);
9842     emit_insn ((<hi_lanes_optab>
9843                 ? gen_aarch64_sve_zip2<mode>
9844                 : gen_aarch64_sve_zip1<mode>)
9845                 (temp, operands[1], operands[1]));
9846     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9847     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9848     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9849                (operands[0], ptrue, temp, strictness));
9850     DONE;
9851   }
9852 )
9853
9854 ;; Predicated float-to-float extension.
9855 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9856   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9857         (unspec:SVE_FULL_SDF
9858           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9859            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9860            (match_operand:SVE_FULL_HSF 2 "register_operand")]
9861           SVE_COND_FCVT))]
9862   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9863   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9864      [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9865      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9866   }
9867 )
9868
9869 ;; Predicated float-to-float extension with merging.
9870 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9871   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9872         (unspec:SVE_FULL_SDF
9873           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9874            (unspec:SVE_FULL_SDF
9875              [(match_dup 1)
9876               (const_int SVE_STRICT_GP)
9877               (match_operand:SVE_FULL_HSF 2 "register_operand")]
9878              SVE_COND_FCVT)
9879            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9880           UNSPEC_SEL))]
9881   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9882 )
9883
9884 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9885   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9886         (unspec:SVE_FULL_SDF
9887           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9888            (unspec:SVE_FULL_SDF
9889              [(match_dup 1)
9890               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9891               (match_operand:SVE_FULL_HSF 2 "register_operand")]
9892              SVE_COND_FCVT)
9893            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9894           UNSPEC_SEL))]
9895   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9896   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9897      [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9898      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9899      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9900   }
9901 )
9902
9903 ;; -------------------------------------------------------------------------
9904 ;; ---- [PRED<-PRED] Packs
9905 ;; -------------------------------------------------------------------------
9906 ;; Includes:
9907 ;; - UZP1
9908 ;; -------------------------------------------------------------------------
9909
9910 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
9911 ;; the high part of each wide element.
9912 (define_insn "vec_pack_trunc_<Vwide>"
9913   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9914         (unspec:PRED_BHS
9915           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9916            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9917           UNSPEC_PACK))]
9918   "TARGET_SVE"
9919   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9920 )
9921
9922 ;; -------------------------------------------------------------------------
9923 ;; ---- [PRED<-PRED] Unpacks
9924 ;; -------------------------------------------------------------------------
9925 ;; Includes:
9926 ;; - PUNPKHI
9927 ;; - PUNPKLO
9928 ;; -------------------------------------------------------------------------
9929
9930 ;; Unpack the low or high half of a predicate, where "high" refers to
9931 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9932 ;; for little-endian.
9933 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9934   [(match_operand:<VWIDE> 0 "register_operand")
9935    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9936                    UNPACK)]
9937   "TARGET_SVE"
9938   {
9939     emit_insn ((<hi_lanes_optab>
9940                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9941                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9942                (operands[0], operands[1]));
9943     DONE;
9944   }
9945 )
9946
9947 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
9948   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9949         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9950                         UNPACK_UNSIGNED))]
9951   "TARGET_SVE"
9952   "punpk<perm_hilo>\t%0.h, %1.b"
9953 )
9954
9955 ;; =========================================================================
9956 ;; == Vector partitioning
9957 ;; =========================================================================
9958
9959 ;; -------------------------------------------------------------------------
9960 ;; ---- [PRED] Unary partitioning
9961 ;; -------------------------------------------------------------------------
9962 ;; Includes:
9963 ;; - BRKA
9964 ;; - BRKAS
9965 ;; - BRKB
9966 ;; - BRKBS
9967 ;; -------------------------------------------------------------------------
9968
9969 ;; Note that unlike most other instructions that have both merging and
9970 ;; zeroing forms, these instructions don't operate elementwise and so
9971 ;; don't fit the IFN_COND model.
9972 (define_insn "@aarch64_brk<brk_op>"
9973   [(set (match_operand:VNx16BI 0 "register_operand")
9974         (unspec:VNx16BI
9975           [(match_operand:VNx16BI 1 "register_operand")
9976            (match_operand:VNx16BI 2 "register_operand")
9977            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero")]
9978           SVE_BRK_UNARY))]
9979   "TARGET_SVE"
9980   {@ [ cons: =0 , 1   , 2   , 3  ; attrs: pred_clobber ]
9981      [ &Upa     ,  Upa , Upa , Dz; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b
9982      [ ?Upa     ,  0Upa, 0Upa, Dz; yes                 ] ^
9983      [ Upa      ,  Upa , Upa , Dz; no                  ] ^
9984      [ &Upa     ,  Upa , Upa , 0 ; yes                 ] brk<brk_op>\t%0.b, %1/m, %2.b
9985      [ ?Upa     ,  0Upa, 0Upa, 0 ; yes                 ] ^
9986      [ Upa      ,  Upa , Upa , 0 ; no                  ] ^
9987   }
9988 )
9989
9990 ;; Same, but also producing a flags result.
9991 (define_insn "*aarch64_brk<brk_op>_cc"
9992   [(set (reg:CC_NZC CC_REGNUM)
9993         (unspec:CC_NZC
9994           [(match_operand:VNx16BI 1 "register_operand")
9995            (match_dup 1)
9996            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9997            (unspec:VNx16BI
9998              [(match_dup 1)
9999               (match_operand:VNx16BI 2 "register_operand")
10000               (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10001              SVE_BRK_UNARY)]
10002           UNSPEC_PTEST))
10003    (set (match_operand:VNx16BI 0 "register_operand")
10004         (unspec:VNx16BI
10005           [(match_dup 1)
10006            (match_dup 2)
10007            (match_dup 3)]
10008           SVE_BRK_UNARY))]
10009   "TARGET_SVE"
10010   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
10011      [ &Upa    , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b
10012      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
10013      [ Upa     , Upa , Upa ; no                  ] ^
10014   }
10015 )
10016
10017 ;; Same, but with only the flags result being interesting.
10018 (define_insn "*aarch64_brk<brk_op>_ptest"
10019   [(set (reg:CC_NZC CC_REGNUM)
10020         (unspec:CC_NZC
10021           [(match_operand:VNx16BI 1 "register_operand")
10022            (match_dup 1)
10023            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10024            (unspec:VNx16BI
10025              [(match_dup 1)
10026               (match_operand:VNx16BI 2 "register_operand")
10027               (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10028              SVE_BRK_UNARY)]
10029           UNSPEC_PTEST))
10030    (clobber (match_scratch:VNx16BI 0))]
10031   "TARGET_SVE"
10032   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
10033      [ &Upa    , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b
10034      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
10035      [ Upa     , Upa , Upa ; no                  ] ^
10036   }
10037 )
10038
10039 ;; -------------------------------------------------------------------------
10040 ;; ---- [PRED] Binary partitioning
10041 ;; -------------------------------------------------------------------------
10042 ;; Includes:
10043 ;; - BRKN
10044 ;; - BRKNS
10045 ;; - BRKPA
10046 ;; - BRKPAS
10047 ;; - BRKPB
10048 ;; - BRKPBS
10049 ;; -------------------------------------------------------------------------
10050
10051 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
10052 (define_insn "@aarch64_brk<brk_op>"
10053   [(set (match_operand:VNx16BI 0 "register_operand")
10054         (unspec:VNx16BI
10055           [(match_operand:VNx16BI 1 "register_operand")
10056            (match_operand:VNx16BI 2 "register_operand")
10057            (match_operand:VNx16BI 3 "register_operand")]
10058           SVE_BRK_BINARY))]
10059   "TARGET_SVE"
10060   {@ [ cons: =0,  1  , 2   , 3             ; attrs: pred_clobber ]
10061      [ &Upa    , Upa , Upa , <brk_reg_con> ; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
10062      [ ?Upa    , 0Upa, 0Upa, 0<brk_reg_con>; yes                 ] ^
10063      [ Upa     , Upa , Upa , <brk_reg_con> ; no                  ] ^
10064   }
10065 )
10066
10067 ;; BRKN, producing both a predicate and a flags result.  Unlike other
10068 ;; flag-setting instructions, these flags are always set wrt a ptrue.
10069 (define_insn_and_rewrite "*aarch64_brkn_cc"
10070   [(set (reg:CC_NZC CC_REGNUM)
10071         (unspec:CC_NZC
10072           [(match_operand:VNx16BI 4)
10073            (match_operand:VNx16BI 5)
10074            (const_int SVE_KNOWN_PTRUE)
10075            (unspec:VNx16BI
10076              [(match_operand:VNx16BI 1 "register_operand" "Upa")
10077               (match_operand:VNx16BI 2 "register_operand" "Upa")
10078               (match_operand:VNx16BI 3 "register_operand" "0")]
10079              UNSPEC_BRKN)]
10080           UNSPEC_PTEST))
10081    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
10082         (unspec:VNx16BI
10083           [(match_dup 1)
10084            (match_dup 2)
10085            (match_dup 3)]
10086           UNSPEC_BRKN))]
10087   "TARGET_SVE"
10088   "brkns\t%0.b, %1/z, %2.b, %0.b"
10089   "&& (operands[4] != CONST0_RTX (VNx16BImode)
10090        || operands[5] != CONST0_RTX (VNx16BImode))"
10091   {
10092     operands[4] = CONST0_RTX (VNx16BImode);
10093     operands[5] = CONST0_RTX (VNx16BImode);
10094   }
10095 )
10096
10097 ;; Same, but with only the flags result being interesting.
10098 (define_insn_and_rewrite "*aarch64_brkn_ptest"
10099   [(set (reg:CC_NZC CC_REGNUM)
10100         (unspec:CC_NZC
10101           [(match_operand:VNx16BI 4)
10102            (match_operand:VNx16BI 5)
10103            (const_int SVE_KNOWN_PTRUE)
10104            (unspec:VNx16BI
10105              [(match_operand:VNx16BI 1 "register_operand" "Upa")
10106               (match_operand:VNx16BI 2 "register_operand" "Upa")
10107               (match_operand:VNx16BI 3 "register_operand" "0")]
10108              UNSPEC_BRKN)]
10109           UNSPEC_PTEST))
10110    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
10111   "TARGET_SVE"
10112   "brkns\t%0.b, %1/z, %2.b, %0.b"
10113   "&& (operands[4] != CONST0_RTX (VNx16BImode)
10114        || operands[5] != CONST0_RTX (VNx16BImode))"
10115   {
10116     operands[4] = CONST0_RTX (VNx16BImode);
10117     operands[5] = CONST0_RTX (VNx16BImode);
10118   }
10119 )
10120
10121 ;; BRKPA and BRKPB, producing both a predicate and a flags result.
10122 (define_insn "*aarch64_brk<brk_op>_cc"
10123   [(set (reg:CC_NZC CC_REGNUM)
10124         (unspec:CC_NZC
10125           [(match_operand:VNx16BI 1 "register_operand")
10126            (match_dup 1)
10127            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10128            (unspec:VNx16BI
10129              [(match_dup 1)
10130               (match_operand:VNx16BI 2 "register_operand")
10131               (match_operand:VNx16BI 3 "register_operand")]
10132              SVE_BRKP)]
10133           UNSPEC_PTEST))
10134    (set (match_operand:VNx16BI 0 "register_operand")
10135         (unspec:VNx16BI
10136           [(match_dup 1)
10137            (match_dup 2)
10138            (match_dup 3)]
10139           SVE_BRKP))]
10140   "TARGET_SVE"
10141   {@ [ cons: =0, 1   , 2   , 3   , 4; attrs: pred_clobber ]
10142      [ &Upa    , Upa , Upa , Upa ,  ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10143      [ ?Upa    , 0Upa, 0Upa, 0Upa,  ; yes                 ] ^
10144      [ Upa     , Upa , Upa , Upa ,  ; no                  ] ^
10145   }
10146 )
10147
10148 ;; Same, but with only the flags result being interesting.
10149 (define_insn "*aarch64_brk<brk_op>_ptest"
10150   [(set (reg:CC_NZC CC_REGNUM)
10151         (unspec:CC_NZC
10152           [(match_operand:VNx16BI 1 "register_operand")
10153            (match_dup 1)
10154            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10155            (unspec:VNx16BI
10156              [(match_dup 1)
10157               (match_operand:VNx16BI 2 "register_operand")
10158               (match_operand:VNx16BI 3 "register_operand")]
10159              SVE_BRKP)]
10160           UNSPEC_PTEST))
10161    (clobber (match_scratch:VNx16BI 0))]
10162   "TARGET_SVE"
10163   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
10164      [ &Upa    , Upa , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10165      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
10166      [ Upa     , Upa , Upa , Upa ; no                  ] ^
10167   }
10168 )
10169
10170 ;; -------------------------------------------------------------------------
10171 ;; ---- [PRED] Scalarization
10172 ;; -------------------------------------------------------------------------
10173 ;; Includes:
10174 ;; - PFIRST
10175 ;; - PNEXT
10176 ;; -------------------------------------------------------------------------
10177
10178 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
10179   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10180         (unspec:PRED_ALL
10181           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
10182            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10183            (match_operand:PRED_ALL 3 "register_operand" "0")]
10184           SVE_PITER))
10185    (clobber (reg:CC_NZC CC_REGNUM))]
10186   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
10187   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10188 )
10189
10190 ;; Same, but also producing a flags result.
10191 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
10192   [(set (reg:CC_NZC CC_REGNUM)
10193         (unspec:CC_NZC
10194           [(match_operand:VNx16BI 1 "register_operand" "Upa")
10195            (match_operand 2)
10196            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10197            (unspec:PRED_ALL
10198              [(match_operand 4)
10199               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10200               (match_operand:PRED_ALL 6 "register_operand" "0")]
10201              SVE_PITER)]
10202           UNSPEC_PTEST))
10203    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10204         (unspec:PRED_ALL
10205           [(match_dup 4)
10206            (match_dup 5)
10207            (match_dup 6)]
10208           SVE_PITER))]
10209   "TARGET_SVE
10210    && <max_elem_bits> >= <elem_bits>
10211    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10212   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10213   "&& !rtx_equal_p (operands[2], operands[4])"
10214   {
10215     operands[4] = operands[2];
10216     operands[5] = operands[3];
10217   }
10218 )
10219
10220 ;; Same, but with only the flags result being interesting.
10221 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
10222   [(set (reg:CC_NZC CC_REGNUM)
10223         (unspec:CC_NZC
10224           [(match_operand:VNx16BI 1 "register_operand" "Upa")
10225            (match_operand 2)
10226            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10227            (unspec:PRED_ALL
10228              [(match_operand 4)
10229               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10230               (match_operand:PRED_ALL 6 "register_operand" "0")]
10231              SVE_PITER)]
10232           UNSPEC_PTEST))
10233    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
10234   "TARGET_SVE
10235    && <max_elem_bits> >= <elem_bits>
10236    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10237   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10238   "&& !rtx_equal_p (operands[2], operands[4])"
10239   {
10240     operands[4] = operands[2];
10241     operands[5] = operands[3];
10242   }
10243 )
10244
10245 ;; =========================================================================
10246 ;; == Counting elements
10247 ;; =========================================================================
10248
10249 ;; -------------------------------------------------------------------------
10250 ;; ---- [INT] Count elements in a pattern (scalar)
10251 ;; -------------------------------------------------------------------------
10252 ;; Includes:
10253 ;; - CNTB
10254 ;; - CNTD
10255 ;; - CNTH
10256 ;; - CNTW
10257 ;; -------------------------------------------------------------------------
10258
10259 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
10260 ;; operand 2 is the number of elements that fit in a 128-bit block, and
10261 ;; operand 3 is a multiplier in the range [1, 16].
10262 ;;
10263 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
10264 (define_insn "aarch64_sve_cnt_pat"
10265   [(set (match_operand:DI 0 "register_operand" "=r")
10266         (zero_extend:DI
10267           (unspec:SI [(match_operand:DI 1 "const_int_operand")
10268                       (match_operand:DI 2 "const_int_operand")
10269                       (match_operand:DI 3 "const_int_operand")]
10270                      UNSPEC_SVE_CNT_PAT)))]
10271   "TARGET_SVE"
10272   {
10273     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
10274   }
10275 )
10276
10277 ;; -------------------------------------------------------------------------
10278 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
10279 ;; -------------------------------------------------------------------------
10280 ;; Includes:
10281 ;; - INC
10282 ;; - SQINC
10283 ;; - UQINC
10284 ;; -------------------------------------------------------------------------
10285
10286 ;; Increment a DImode register by the number of elements in an svpattern.
10287 ;; See aarch64_sve_cnt_pat for the counting behavior.
10288 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10289   [(set (match_operand:DI 0 "register_operand" "=r")
10290         (ANY_PLUS:DI (zero_extend:DI
10291                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
10292                                    (match_operand:DI 3 "const_int_operand")
10293                                    (match_operand:DI 4 "const_int_operand")]
10294                                   UNSPEC_SVE_CNT_PAT))
10295                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
10296   "TARGET_SVE"
10297   {
10298     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10299                                                  operands + 2);
10300   }
10301 )
10302
10303 ;; Increment an SImode register by the number of elements in an svpattern
10304 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
10305 ;; behavior.
10306 (define_insn "*aarch64_sve_incsi_pat"
10307   [(set (match_operand:SI 0 "register_operand" "=r")
10308         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
10309                              (match_operand:DI 3 "const_int_operand")
10310                              (match_operand:DI 4 "const_int_operand")]
10311                             UNSPEC_SVE_CNT_PAT)
10312                  (match_operand:SI 1 "register_operand" "0")))]
10313   "TARGET_SVE"
10314   {
10315     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
10316   }
10317 )
10318
10319 ;; Increment an SImode register by the number of elements in an svpattern
10320 ;; using saturating arithmetic, extending the result to 64 bits.
10321 ;;
10322 ;; See aarch64_sve_cnt_pat for the counting behavior.
10323 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10324   [(set (match_operand:DI 0 "register_operand" "=r")
10325         (<paired_extend>:DI
10326           (SAT_PLUS:SI
10327             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10328                         (match_operand:DI 3 "const_int_operand")
10329                         (match_operand:DI 4 "const_int_operand")]
10330                        UNSPEC_SVE_CNT_PAT)
10331             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10332   "TARGET_SVE"
10333   {
10334     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
10335     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10336                                                  operands + 2);
10337   }
10338 )
10339
10340 ;; -------------------------------------------------------------------------
10341 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
10342 ;; -------------------------------------------------------------------------
10343 ;; Includes:
10344 ;; - INC
10345 ;; - SQINC
10346 ;; - UQINC
10347 ;; -------------------------------------------------------------------------
10348
10349 ;; Increment a vector of DIs by the number of elements in an svpattern.
10350 ;; See aarch64_sve_cnt_pat for the counting behavior.
10351 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10352   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10353         (ANY_PLUS:VNx2DI
10354           (vec_duplicate:VNx2DI
10355             (zero_extend:DI
10356               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10357                           (match_operand:DI 3 "const_int_operand")
10358                           (match_operand:DI 4 "const_int_operand")]
10359                          UNSPEC_SVE_CNT_PAT)))
10360           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10361   "TARGET_SVE"
10362   {
10363     if (which_alternative == 1)
10364       output_asm_insn ("movprfx\t%0, %1", operands);
10365     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10366                                                  operands + 2);
10367   }
10368   [(set_attr "movprfx" "*,yes")]
10369 )
10370
10371 ;; Increment a vector of SIs by the number of elements in an svpattern.
10372 ;; See aarch64_sve_cnt_pat for the counting behavior.
10373 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10374   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10375         (ANY_PLUS:VNx4SI
10376           (vec_duplicate:VNx4SI
10377             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10378                         (match_operand:DI 3 "const_int_operand")
10379                         (match_operand:DI 4 "const_int_operand")]
10380                        UNSPEC_SVE_CNT_PAT))
10381           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10382   "TARGET_SVE"
10383   {
10384     if (which_alternative == 1)
10385       output_asm_insn ("movprfx\t%0, %1", operands);
10386     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10387                                                  operands + 2);
10388   }
10389   [(set_attr "movprfx" "*,yes")]
10390 )
10391
10392 ;; Increment a vector of HIs by the number of elements in an svpattern.
10393 ;; See aarch64_sve_cnt_pat for the counting behavior.
10394 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10395   [(set (match_operand:VNx8HI 0 "register_operand")
10396         (ANY_PLUS:VNx8HI
10397           (vec_duplicate:VNx8HI
10398             (truncate:HI
10399               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10400                           (match_operand:DI 3 "const_int_operand")
10401                           (match_operand:DI 4 "const_int_operand")]
10402                          UNSPEC_SVE_CNT_PAT)))
10403           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10404   "TARGET_SVE"
10405 )
10406
10407 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10408   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10409         (ANY_PLUS:VNx8HI
10410           (vec_duplicate:VNx8HI
10411             (match_operator:HI 5 "subreg_lowpart_operator"
10412               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10413                            (match_operand:DI 3 "const_int_operand")
10414                            (match_operand:DI 4 "const_int_operand")]
10415                           UNSPEC_SVE_CNT_PAT)]))
10416           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10417   "TARGET_SVE"
10418   {
10419     if (which_alternative == 1)
10420       output_asm_insn ("movprfx\t%0, %1", operands);
10421     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10422                                                  operands + 2);
10423   }
10424   [(set_attr "movprfx" "*,yes")]
10425 )
10426
10427 ;; -------------------------------------------------------------------------
10428 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
10429 ;; -------------------------------------------------------------------------
10430 ;; Includes:
10431 ;; - DEC
10432 ;; - SQDEC
10433 ;; - UQDEC
10434 ;; -------------------------------------------------------------------------
10435
10436 ;; Decrement a DImode register by the number of elements in an svpattern.
10437 ;; See aarch64_sve_cnt_pat for the counting behavior.
10438 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10439   [(set (match_operand:DI 0 "register_operand" "=r")
10440         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
10441                       (zero_extend:DI
10442                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
10443                                     (match_operand:DI 3 "const_int_operand")
10444                                     (match_operand:DI 4 "const_int_operand")]
10445                                    UNSPEC_SVE_CNT_PAT))))]
10446   "TARGET_SVE"
10447   {
10448     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10449                                                  operands + 2);
10450   }
10451 )
10452
10453 ;; Decrement an SImode register by the number of elements in an svpattern
10454 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
10455 ;; behavior.
10456 (define_insn "*aarch64_sve_decsi_pat"
10457   [(set (match_operand:SI 0 "register_operand" "=r")
10458         (minus:SI (match_operand:SI 1 "register_operand" "0")
10459                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
10460                               (match_operand:DI 3 "const_int_operand")
10461                               (match_operand:DI 4 "const_int_operand")]
10462                              UNSPEC_SVE_CNT_PAT)))]
10463   "TARGET_SVE"
10464   {
10465     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10466   }
10467 )
10468
10469 ;; Decrement an SImode register by the number of elements in an svpattern
10470 ;; using saturating arithmetic, extending the result to 64 bits.
10471 ;;
10472 ;; See aarch64_sve_cnt_pat for the counting behavior.
10473 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10474   [(set (match_operand:DI 0 "register_operand" "=r")
10475         (<paired_extend>:DI
10476           (SAT_MINUS:SI
10477             (match_operand:SI_ONLY 1 "register_operand" "0")
10478             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10479                         (match_operand:DI 3 "const_int_operand")
10480                         (match_operand:DI 4 "const_int_operand")]
10481                        UNSPEC_SVE_CNT_PAT))))]
10482   "TARGET_SVE"
10483   {
10484     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10485     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10486                                                  operands + 2);
10487   }
10488 )
10489
10490 ;; -------------------------------------------------------------------------
10491 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10492 ;; -------------------------------------------------------------------------
10493 ;; Includes:
10494 ;; - DEC
10495 ;; - SQDEC
10496 ;; - UQDEC
10497 ;; -------------------------------------------------------------------------
10498
10499 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10500 ;; See aarch64_sve_cnt_pat for the counting behavior.
10501 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10502   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10503         (ANY_MINUS:VNx2DI
10504           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10505           (vec_duplicate:VNx2DI
10506             (zero_extend:DI
10507               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10508                           (match_operand:DI 3 "const_int_operand")
10509                           (match_operand:DI 4 "const_int_operand")]
10510                          UNSPEC_SVE_CNT_PAT)))))]
10511   "TARGET_SVE"
10512   {
10513     if (which_alternative == 1)
10514       output_asm_insn ("movprfx\t%0, %1", operands);
10515     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10516                                                  operands + 2);
10517   }
10518   [(set_attr "movprfx" "*,yes")]
10519 )
10520
10521 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10522 ;; See aarch64_sve_cnt_pat for the counting behavior.
10523 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10524   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10525         (ANY_MINUS:VNx4SI
10526           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10527           (vec_duplicate:VNx4SI
10528             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10529                         (match_operand:DI 3 "const_int_operand")
10530                         (match_operand:DI 4 "const_int_operand")]
10531                        UNSPEC_SVE_CNT_PAT))))]
10532   "TARGET_SVE"
10533   {
10534     if (which_alternative == 1)
10535       output_asm_insn ("movprfx\t%0, %1", operands);
10536     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10537                                                  operands + 2);
10538   }
10539   [(set_attr "movprfx" "*,yes")]
10540 )
10541
10542 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10543 ;; See aarch64_sve_cnt_pat for the counting behavior.
10544 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10545   [(set (match_operand:VNx8HI 0 "register_operand")
10546         (ANY_MINUS:VNx8HI
10547           (match_operand:VNx8HI_ONLY 1 "register_operand")
10548           (vec_duplicate:VNx8HI
10549             (truncate:HI
10550               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10551                           (match_operand:DI 3 "const_int_operand")
10552                           (match_operand:DI 4 "const_int_operand")]
10553                          UNSPEC_SVE_CNT_PAT)))))]
10554   "TARGET_SVE"
10555 )
10556
10557 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10558   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10559         (ANY_MINUS:VNx8HI
10560           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10561           (vec_duplicate:VNx8HI
10562             (match_operator:HI 5 "subreg_lowpart_operator"
10563               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10564                            (match_operand:DI 3 "const_int_operand")
10565                            (match_operand:DI 4 "const_int_operand")]
10566                           UNSPEC_SVE_CNT_PAT)]))))]
10567   "TARGET_SVE"
10568   {
10569     if (which_alternative == 1)
10570       output_asm_insn ("movprfx\t%0, %1", operands);
10571     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10572                                                  operands + 2);
10573   }
10574   [(set_attr "movprfx" "*,yes")]
10575 )
10576
10577 ;; -------------------------------------------------------------------------
10578 ;; ---- [INT] Count elements in a predicate (scalar)
10579 ;; -------------------------------------------------------------------------
10580 ;; Includes:
10581 ;; - CNTP
10582 ;; -------------------------------------------------------------------------
10583
10584 ;; Count the number of set bits in a predicate.  Operand 3 is true if
10585 ;; operand 1 is known to be all-true.
10586 (define_insn "@aarch64_pred_cntp<mode>"
10587   [(set (match_operand:DI 0 "register_operand" "=r")
10588         (zero_extend:DI
10589           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10590                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10591                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10592                      UNSPEC_CNTP)))]
10593   "TARGET_SVE"
10594   "cntp\t%x0, %1, %3.<Vetype>")
10595
10596 ;; -------------------------------------------------------------------------
10597 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10598 ;; -------------------------------------------------------------------------
10599 ;; Includes:
10600 ;; - INCP
10601 ;; - SQINCP
10602 ;; - UQINCP
10603 ;; -------------------------------------------------------------------------
10604
10605 ;; Increment a DImode register by the number of set bits in a predicate.
10606 ;; See aarch64_sve_cntp for a description of the operands.
10607 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10608   [(set (match_operand:DI 0 "register_operand")
10609         (ANY_PLUS:DI
10610           (zero_extend:DI
10611             (unspec:SI [(match_dup 3)
10612                         (const_int SVE_KNOWN_PTRUE)
10613                         (match_operand:PRED_ALL 2 "register_operand")]
10614                        UNSPEC_CNTP))
10615           (match_operand:DI_ONLY 1 "register_operand")))]
10616   "TARGET_SVE"
10617   {
10618     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10619   }
10620 )
10621
10622 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10623   [(set (match_operand:DI 0 "register_operand" "=r")
10624         (ANY_PLUS:DI
10625           (zero_extend:DI
10626             (unspec:SI [(match_operand 3)
10627                         (const_int SVE_KNOWN_PTRUE)
10628                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10629                        UNSPEC_CNTP))
10630           (match_operand:DI_ONLY 1 "register_operand" "0")))]
10631   "TARGET_SVE"
10632   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10633   "&& !CONSTANT_P (operands[3])"
10634   {
10635     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10636   }
10637 )
10638
10639 ;; Increment an SImode register by the number of set bits in a predicate
10640 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
10641 ;; the operands.
10642 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10643   [(set (match_operand:SI 0 "register_operand" "=r")
10644         (plus:SI
10645           (unspec:SI [(match_operand 3)
10646                       (const_int SVE_KNOWN_PTRUE)
10647                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10648                      UNSPEC_CNTP)
10649           (match_operand:SI 1 "register_operand" "0")))]
10650   "TARGET_SVE"
10651   "incp\t%x0, %2.<Vetype>"
10652   "&& !CONSTANT_P (operands[3])"
10653   {
10654     operands[3] = CONSTM1_RTX (<MODE>mode);
10655   }
10656 )
10657
10658 ;; Increment an SImode register by the number of set bits in a predicate
10659 ;; using saturating arithmetic, extending the result to 64 bits.
10660 ;;
10661 ;; See aarch64_sve_cntp for a description of the operands.
10662 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10663   [(set (match_operand:DI 0 "register_operand")
10664         (<paired_extend>:DI
10665           (SAT_PLUS:SI
10666             (unspec:SI [(match_dup 3)
10667                         (const_int SVE_KNOWN_PTRUE)
10668                         (match_operand:PRED_ALL 2 "register_operand")]
10669                        UNSPEC_CNTP)
10670             (match_operand:SI_ONLY 1 "register_operand"))))]
10671   "TARGET_SVE"
10672   {
10673     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10674   }
10675 )
10676
10677 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10678   [(set (match_operand:DI 0 "register_operand" "=r")
10679         (<paired_extend>:DI
10680           (SAT_PLUS:SI
10681             (unspec:SI [(match_operand 3)
10682                         (const_int SVE_KNOWN_PTRUE)
10683                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10684                        UNSPEC_CNTP)
10685             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10686   "TARGET_SVE"
10687   {
10688     if (<CODE> == SS_PLUS)
10689       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10690     else
10691       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10692   }
10693   "&& !CONSTANT_P (operands[3])"
10694   {
10695     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10696   }
10697 )
10698
10699 ;; -------------------------------------------------------------------------
10700 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10701 ;; -------------------------------------------------------------------------
10702 ;; Includes:
10703 ;; - INCP
10704 ;; - SQINCP
10705 ;; - UQINCP
10706 ;; -------------------------------------------------------------------------
10707
10708 ;; Increment a vector of DIs by the number of set bits in a predicate.
10709 ;; See aarch64_sve_cntp for a description of the operands.
10710 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10711   [(set (match_operand:VNx2DI 0 "register_operand")
10712         (ANY_PLUS:VNx2DI
10713           (vec_duplicate:VNx2DI
10714             (zero_extend:DI
10715               (unspec:SI
10716                 [(match_dup 3)
10717                  (const_int SVE_KNOWN_PTRUE)
10718                  (match_operand:<VPRED> 2 "register_operand")]
10719                 UNSPEC_CNTP)))
10720           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10721   "TARGET_SVE"
10722   {
10723     operands[3] = CONSTM1_RTX (<VPRED>mode);
10724   }
10725 )
10726
10727 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10728   [(set (match_operand:VNx2DI 0 "register_operand")
10729         (ANY_PLUS:VNx2DI
10730           (vec_duplicate:VNx2DI
10731             (zero_extend:DI
10732               (unspec:SI
10733                 [(match_operand 3)
10734                  (const_int SVE_KNOWN_PTRUE)
10735                  (match_operand:<VPRED> 2 "register_operand")]
10736                 UNSPEC_CNTP)))
10737           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10738   "TARGET_SVE"
10739   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10740      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.d, %2
10741      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10742   }
10743   "&& !CONSTANT_P (operands[3])"
10744   {
10745     operands[3] = CONSTM1_RTX (<VPRED>mode);
10746   }
10747 )
10748
10749 ;; Increment a vector of SIs by the number of set bits in a predicate.
10750 ;; See aarch64_sve_cntp for a description of the operands.
10751 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10752   [(set (match_operand:VNx4SI 0 "register_operand")
10753         (ANY_PLUS:VNx4SI
10754           (vec_duplicate:VNx4SI
10755             (unspec:SI
10756               [(match_dup 3)
10757                (const_int SVE_KNOWN_PTRUE)
10758                (match_operand:<VPRED> 2 "register_operand")]
10759               UNSPEC_CNTP))
10760           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10761   "TARGET_SVE"
10762   {
10763     operands[3] = CONSTM1_RTX (<VPRED>mode);
10764   }
10765 )
10766
10767 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10768   [(set (match_operand:VNx4SI 0 "register_operand")
10769         (ANY_PLUS:VNx4SI
10770           (vec_duplicate:VNx4SI
10771             (unspec:SI
10772               [(match_operand 3)
10773                (const_int SVE_KNOWN_PTRUE)
10774                (match_operand:<VPRED> 2 "register_operand")]
10775               UNSPEC_CNTP))
10776           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10777   "TARGET_SVE"
10778   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10779      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.s, %2
10780      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
10781   }
10782   "&& !CONSTANT_P (operands[3])"
10783   {
10784     operands[3] = CONSTM1_RTX (<VPRED>mode);
10785   }
10786 )
10787
10788 ;; Increment a vector of HIs by the number of set bits in a predicate.
10789 ;; See aarch64_sve_cntp for a description of the operands.
10790 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10791   [(set (match_operand:VNx8HI 0 "register_operand")
10792         (ANY_PLUS:VNx8HI
10793           (vec_duplicate:VNx8HI
10794             (truncate:HI
10795               (unspec:SI
10796                 [(match_dup 3)
10797                  (const_int SVE_KNOWN_PTRUE)
10798                  (match_operand:<VPRED> 2 "register_operand")]
10799                 UNSPEC_CNTP)))
10800           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10801   "TARGET_SVE"
10802   {
10803     operands[3] = CONSTM1_RTX (<VPRED>mode);
10804   }
10805 )
10806
10807 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10808   [(set (match_operand:VNx8HI 0 "register_operand")
10809         (ANY_PLUS:VNx8HI
10810           (vec_duplicate:VNx8HI
10811             (match_operator:HI 3 "subreg_lowpart_operator"
10812               [(unspec:SI
10813                  [(match_operand 4)
10814                   (const_int SVE_KNOWN_PTRUE)
10815                   (match_operand:<VPRED> 2 "register_operand")]
10816                  UNSPEC_CNTP)]))
10817           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10818   "TARGET_SVE"
10819   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10820      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.h, %2
10821      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
10822   }
10823   "&& !CONSTANT_P (operands[4])"
10824   {
10825     operands[4] = CONSTM1_RTX (<VPRED>mode);
10826   }
10827 )
10828
10829 ;; -------------------------------------------------------------------------
10830 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10831 ;; -------------------------------------------------------------------------
10832 ;; Includes:
10833 ;; - DECP
10834 ;; - SQDECP
10835 ;; - UQDECP
10836 ;; -------------------------------------------------------------------------
10837
10838 ;; Decrement a DImode register by the number of set bits in a predicate.
10839 ;; See aarch64_sve_cntp for a description of the operands.
10840 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10841   [(set (match_operand:DI 0 "register_operand")
10842         (ANY_MINUS:DI
10843           (match_operand:DI_ONLY 1 "register_operand")
10844           (zero_extend:DI
10845             (unspec:SI [(match_dup 3)
10846                         (const_int SVE_KNOWN_PTRUE)
10847                         (match_operand:PRED_ALL 2 "register_operand")]
10848                        UNSPEC_CNTP))))]
10849   "TARGET_SVE"
10850   {
10851     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10852   }
10853 )
10854
10855 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10856   [(set (match_operand:DI 0 "register_operand" "=r")
10857         (ANY_MINUS:DI
10858           (match_operand:DI_ONLY 1 "register_operand" "0")
10859           (zero_extend:DI
10860             (unspec:SI [(match_operand 3)
10861                         (const_int SVE_KNOWN_PTRUE)
10862                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10863                        UNSPEC_CNTP))))]
10864   "TARGET_SVE"
10865   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10866   "&& !CONSTANT_P (operands[3])"
10867   {
10868     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10869   }
10870 )
10871
10872 ;; Decrement an SImode register by the number of set bits in a predicate
10873 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
10874 ;; operands.
10875 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10876   [(set (match_operand:SI 0 "register_operand" "=r")
10877         (minus:SI
10878           (match_operand:SI 1 "register_operand" "0")
10879           (unspec:SI [(match_operand 3)
10880                       (const_int SVE_KNOWN_PTRUE)
10881                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10882                      UNSPEC_CNTP)))]
10883   "TARGET_SVE"
10884   "decp\t%x0, %2.<Vetype>"
10885   "&& !CONSTANT_P (operands[3])"
10886   {
10887     operands[3] = CONSTM1_RTX (<MODE>mode);
10888   }
10889 )
10890
10891 ;; Decrement an SImode register by the number of set bits in a predicate
10892 ;; using saturating arithmetic, extending the result to 64 bits.
10893 ;;
10894 ;; See aarch64_sve_cntp for a description of the operands.
10895 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10896   [(set (match_operand:DI 0 "register_operand")
10897         (<paired_extend>:DI
10898           (SAT_MINUS:SI
10899             (match_operand:SI_ONLY 1 "register_operand")
10900             (unspec:SI [(match_dup 3)
10901                         (const_int SVE_KNOWN_PTRUE)
10902                         (match_operand:PRED_ALL 2 "register_operand")]
10903                        UNSPEC_CNTP))))]
10904   "TARGET_SVE"
10905   {
10906     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10907   }
10908 )
10909
10910 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10911   [(set (match_operand:DI 0 "register_operand" "=r")
10912         (<paired_extend>:DI
10913           (SAT_MINUS:SI
10914             (match_operand:SI_ONLY 1 "register_operand" "0")
10915             (unspec:SI [(match_operand 3)
10916                         (const_int SVE_KNOWN_PTRUE)
10917                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10918                        UNSPEC_CNTP))))]
10919   "TARGET_SVE"
10920   {
10921     if (<CODE> == SS_MINUS)
10922       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10923     else
10924       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10925   }
10926   "&& !CONSTANT_P (operands[3])"
10927   {
10928     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10929   }
10930 )
10931
10932 ;; -------------------------------------------------------------------------
10933 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10934 ;; -------------------------------------------------------------------------
10935 ;; Includes:
10936 ;; - DECP
10937 ;; - SQDECP
10938 ;; - UQDECP
10939 ;; -------------------------------------------------------------------------
10940
10941 ;; Decrement a vector of DIs by the number of set bits in a predicate.
10942 ;; See aarch64_sve_cntp for a description of the operands.
10943 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10944   [(set (match_operand:VNx2DI 0 "register_operand")
10945         (ANY_MINUS:VNx2DI
10946           (match_operand:VNx2DI_ONLY 1 "register_operand")
10947           (vec_duplicate:VNx2DI
10948             (zero_extend:DI
10949               (unspec:SI
10950                 [(match_dup 3)
10951                  (const_int SVE_KNOWN_PTRUE)
10952                  (match_operand:<VPRED> 2 "register_operand")]
10953                 UNSPEC_CNTP)))))]
10954   "TARGET_SVE"
10955   {
10956     operands[3] = CONSTM1_RTX (<VPRED>mode);
10957   }
10958 )
10959
10960 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10961   [(set (match_operand:VNx2DI 0 "register_operand")
10962         (ANY_MINUS:VNx2DI
10963           (match_operand:VNx2DI_ONLY 1 "register_operand")
10964           (vec_duplicate:VNx2DI
10965             (zero_extend:DI
10966               (unspec:SI
10967                 [(match_operand 3)
10968                  (const_int SVE_KNOWN_PTRUE)
10969                  (match_operand:<VPRED> 2 "register_operand")]
10970                 UNSPEC_CNTP)))))]
10971   "TARGET_SVE"
10972   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10973      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.d, %2
10974      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10975   }
10976   "&& !CONSTANT_P (operands[3])"
10977   {
10978     operands[3] = CONSTM1_RTX (<VPRED>mode);
10979   }
10980 )
10981
10982 ;; Decrement a vector of SIs by the number of set bits in a predicate.
10983 ;; See aarch64_sve_cntp for a description of the operands.
10984 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10985   [(set (match_operand:VNx4SI 0 "register_operand")
10986         (ANY_MINUS:VNx4SI
10987           (match_operand:VNx4SI_ONLY 1 "register_operand")
10988           (vec_duplicate:VNx4SI
10989             (unspec:SI
10990               [(match_dup 3)
10991                (const_int SVE_KNOWN_PTRUE)
10992                (match_operand:<VPRED> 2 "register_operand")]
10993               UNSPEC_CNTP))))]
10994   "TARGET_SVE"
10995   {
10996     operands[3] = CONSTM1_RTX (<VPRED>mode);
10997   }
10998 )
10999
11000 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11001   [(set (match_operand:VNx4SI 0 "register_operand")
11002         (ANY_MINUS:VNx4SI
11003           (match_operand:VNx4SI_ONLY 1 "register_operand")
11004           (vec_duplicate:VNx4SI
11005             (unspec:SI
11006               [(match_operand 3)
11007                (const_int SVE_KNOWN_PTRUE)
11008                (match_operand:<VPRED> 2 "register_operand")]
11009               UNSPEC_CNTP))))]
11010   "TARGET_SVE"
11011   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
11012      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.s, %2
11013      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
11014   }
11015   "&& !CONSTANT_P (operands[3])"
11016   {
11017     operands[3] = CONSTM1_RTX (<VPRED>mode);
11018   }
11019 )
11020
11021 ;; Decrement a vector of HIs by the number of set bits in a predicate.
11022 ;; See aarch64_sve_cntp for a description of the operands.
11023 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11024   [(set (match_operand:VNx8HI 0 "register_operand")
11025         (ANY_MINUS:VNx8HI
11026           (match_operand:VNx8HI_ONLY 1 "register_operand")
11027           (vec_duplicate:VNx8HI
11028             (truncate:HI
11029               (unspec:SI
11030                 [(match_dup 3)
11031                  (const_int SVE_KNOWN_PTRUE)
11032                  (match_operand:<VPRED> 2 "register_operand")]
11033                 UNSPEC_CNTP)))))]
11034   "TARGET_SVE"
11035   {
11036     operands[3] = CONSTM1_RTX (<VPRED>mode);
11037   }
11038 )
11039
11040 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11041   [(set (match_operand:VNx8HI 0 "register_operand")
11042         (ANY_MINUS:VNx8HI
11043           (match_operand:VNx8HI_ONLY 1 "register_operand")
11044           (vec_duplicate:VNx8HI
11045             (match_operator:HI 3 "subreg_lowpart_operator"
11046               [(unspec:SI
11047                  [(match_operand 4)
11048                   (const_int SVE_KNOWN_PTRUE)
11049                   (match_operand:<VPRED> 2 "register_operand")]
11050                  UNSPEC_CNTP)]))))]
11051   "TARGET_SVE"
11052   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
11053      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.h, %2
11054      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
11055   }
11056   "&& !CONSTANT_P (operands[4])"
11057   {
11058     operands[4] = CONSTM1_RTX (<VPRED>mode);
11059   }
11060 )
11061
11062 (define_insn_and_split "@aarch64_sve_get_neonq_<mode>"
11063   [(set (match_operand:<V128> 0 "register_operand" "=w")
11064           (vec_select:<V128>
11065             (match_operand:SVE_FULL 1 "register_operand" "w")
11066             (match_operand 2 "descending_int_parallel")))]
11067   "TARGET_SVE
11068    && BYTES_BIG_ENDIAN
11069    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
11070                 GET_MODE_NUNITS (<V128>mode) - 1)"
11071   "#"
11072   "&& reload_completed"
11073   [(set (match_dup 0) (match_dup 1))]
11074   {
11075     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
11076   }
11077 )
11078
11079 (define_insn "@aarch64_sve_set_neonq_<mode>"
11080   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
11081       (unspec:SVE_FULL
11082         [(match_operand:SVE_FULL 1 "register_operand" "w")
11083         (match_operand:<V128> 2 "register_operand" "w")
11084         (match_operand:<VPRED> 3 "register_operand" "Upl")]
11085         UNSPEC_SET_NEONQ))]
11086   "TARGET_SVE
11087    && BYTES_BIG_ENDIAN"
11088   {
11089     operands[2] = lowpart_subreg (<MODE>mode, operands[2],
11090                                   GET_MODE (operands[2]));
11091     return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
11092   }
11093 )