1 ;; Machine Description for LoongArch SIMD instructions for GNU compiler.
2 ;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 ;; Integer modes supported by LSX.
21 (define_mode_iterator ILSX [V2DI V4SI V8HI V16QI])
23 ;; Integer modes supported by LASX.
24 (define_mode_iterator ILASX [V4DI V8SI V16HI V32QI])
26 ;; FP modes supported by LSX
27 (define_mode_iterator FLSX [V2DF V4SF])
29 ;; FP modes supported by LASX
30 (define_mode_iterator FLASX [V4DF V8SF])
32 ;; All integer modes available
33 (define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])
35 ;; All FP modes available
36 (define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])
38 ;; Mnemonic prefix, "x" for LASX modes.
39 (define_mode_attr x [(V2DI "") (V4SI "") (V8HI "") (V16QI "")
41 (V4DI "x") (V8SI "x") (V16HI "x") (V32QI "x")
42 (V4DF "x") (V8SF "x")])
44 ;; Modifier for vector register, "w" for LSX modes, "u" for LASX modes.
45 (define_mode_attr wu [(V2DI "w") (V4SI "w") (V8HI "w") (V16QI "w")
47 (V4DI "u") (V8SI "u") (V16HI "u") (V32QI "u")
48 (V4DF "u") (V8SF "u")])
50 ;; define_insn name prefix, "lsx" or "lasx"
51 (define_mode_attr simd_isa
52 [(V2DI "lsx") (V4SI "lsx") (V8HI "lsx") (V16QI "lsx")
53 (V2DF "lsx") (V4SF "lsx")
54 (V4DI "lasx") (V8SI "lasx") (V16HI "lasx") (V32QI "lasx")
55 (V4DF "lasx") (V8SF "lasx")])
57 ;; Widen integer modes for intermediate values in RTX pattern.
58 (define_mode_attr WVEC [(V2DI "V2TI") (V4DI "V4TI")
59 (V4SI "V4DI") (V8SI "V8DI")
60 (V8HI "V8SI") (V16HI "V16SI")
61 (V16QI "V16HI") (V32QI "V32HI")])
63 ;; Integer vector modes with the same length and unit size as a mode.
64 (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
65 (V8HI "V8HI") (V16QI "V16QI")
66 (V2DF "V2DI") (V4SF "V4SI")
67 (V4DI "V4DI") (V8SI "V8SI")
68 (V16HI "V16HI") (V32QI "V32QI")
69 (V4DF "V4DI") (V8SF "V8SI")])
71 ;; Lower-case version.
72 (define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si")
73 (V4DF "v4di") (V8SF "v8si")])
75 ;; Suffix for LSX or LASX instructions.
76 (define_mode_attr simdfmt [(V2DF "d") (V4DF "d")
80 (V8HI "h") (V16HI "h")
81 (V16QI "b") (V32QI "b")])
83 ;; Suffix for integer mode in LSX or LASX instructions with FP input but
85 (define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l")
86 (V4SF "w") (V8SF "w")])
88 ;; Suffix for integer mode in LSX or LASX instructions to operating FP
89 ;; vectors using integer vector operations.
90 (define_mode_attr simdfmt_as_i [(V2DF "d") (V4DF "d")
91 (V4SF "w") (V8SF "w")])
93 ;; Size of vector elements in bits.
94 (define_mode_attr elmbits [(V2DI "64") (V4DI "64")
95 (V4SI "32") (V8SI "32")
96 (V8HI "16") (V16HI "16")
97 (V16QI "8") (V32QI "8")])
99 ;; The index of sign bit in FP vector elements.
100 (define_mode_attr elmsgnbit [(V2DF "63") (V4DF "63")
101 (V4SF "31") (V8SF "31")])
103 ;; This attribute is used to form an immediate operand constraint using
104 ;; "const_<bitimm>_operand".
105 (define_mode_attr bitimm [(V16QI "uimm3") (V32QI "uimm3")
106 (V8HI "uimm4") (V16HI "uimm4")
107 (V4SI "uimm5") (V8SI "uimm5")
108 (V2DI "uimm6") (V4DI "uimm6")])
110 ;; =======================================================================
111 ;; For many LASX instructions, the only difference of it from the LSX
112 ;; counterpart is the length of vector operands. Describe these LSX/LASX
113 ;; instruction here so we can avoid duplicating logics.
114 ;; =======================================================================
117 ;; FP vector rounding instructions
120 (define_c_enum "unspec"
125 UNSPEC_SIMD_FRINTRNE])
127 (define_int_iterator SIMD_FRINT
132 UNSPEC_SIMD_FRINTRNE])
134 (define_int_attr simd_frint_rounding
135 [(UNSPEC_SIMD_FRINTRP "rp")
136 (UNSPEC_SIMD_FRINTRZ "rz")
137 (UNSPEC_SIMD_FRINT "")
138 (UNSPEC_SIMD_FRINTRM "rm")
139 (UNSPEC_SIMD_FRINTRNE "rne")])
141 ;; All these, but rint, are controlled by -ffp-int-builtin-inexact.
142 ;; Note: nearbyint is NOT allowed to raise FE_INEXACT even if
143 ;; -ffp-int-builtin-inexact, but rint is ALLOWED to raise it even if
144 ;; -fno-fp-int-builtin-inexact.
145 (define_int_attr simd_frint_pattern
146 [(UNSPEC_SIMD_FRINTRP "ceil")
147 (UNSPEC_SIMD_FRINTRZ "btrunc")
148 (UNSPEC_SIMD_FRINT "rint")
149 (UNSPEC_SIMD_FRINTRNE "roundeven")
150 (UNSPEC_SIMD_FRINTRM "floor")])
152 ;; <x>vfrint.{/rp/rz/rm}
153 (define_insn "<simd_isa>_<x>vfrint<simd_frint_rounding>_<simdfmt>"
154 [(set (match_operand:FVEC 0 "register_operand" "=f")
155 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")]
158 "<x>vfrint<simd_frint_rounding>.<simdfmt>\t%<wu>0,%<wu>1"
159 [(set_attr "type" "simd_fcvt")
160 (set_attr "mode" "<MODE>")])
162 ;; Expand the standard-named patterns to <x>vfrint instructions if
163 ;; raising inexact exception is allowed.
165 (define_expand "<simd_frint_pattern><mode>2"
166 [(set (match_operand:FVEC 0 "register_operand" "=f")
167 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")]
169 "<SIMD_FRINT> == UNSPEC_SIMD_FRINT ||
170 flag_fp_int_builtin_inexact ||
171 !flag_trapping_math")
173 ;; ftrunc is like btrunc, but it's allowed to raise inexact exception
174 ;; even if -fno-fp-int-builtin-inexact.
175 (define_expand "ftrunc<mode>2"
176 [(set (match_operand:FVEC 0 "register_operand" "=f")
177 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")]
178 UNSPEC_SIMD_FRINTRZ))]
181 ;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int-
182 ;; builtin-inexact. The base FP instruction set lacks these operations.
183 ;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still
184 ;; much faster than calling a libc function: on LA464 and LA664 there is a
187 ;; Note that a vreplvei instruction is needed or we'll also operate on the
188 ;; junk in high bits of the vector register and produce random FP exceptions.
190 (define_int_iterator LSX_SCALAR_FRINT
194 UNSPEC_SIMD_FRINTRNE])
196 (define_mode_attr VLSX_FOR_FMODE [(DF "V2DF") (SF "V4SF")])
198 (define_expand "<simd_frint_pattern><mode>2"
200 (vec_duplicate:<VLSX_FOR_FMODE>
201 (match_operand:ANYF 1 "register_operand")))
203 (unspec:<VLSX_FOR_FMODE> [(match_dup 2)] LSX_SCALAR_FRINT))
204 (set (match_operand:ANYF 0 "register_operand")
205 (vec_select:ANYF (match_dup 2) (parallel [(const_int 0)])))]
206 "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
207 "operands[2] = gen_reg_rtx (<VLSX_FOR_FMODE>mode);")
209 ;; <x>vftint.{/rp/rz/rm}
211 "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>"
212 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
214 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")]
217 "<x>vftint<simd_frint_rounding>.<simdifmt_for_f>.<simdfmt>\t%<wu>0,%<wu>1"
218 [(set_attr "type" "simd_fcvt")
219 (set_attr "mode" "<MODE>")])
221 ;; Expand the standard-named patterns to <x>vftint instructions if
222 ;; raising inexact exception.
224 (define_expand "l<simd_frint_pattern><mode><vimode>2"
225 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
227 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")]
229 "<SIMD_FRINT> == UNSPEC_SIMD_FRINT ||
230 flag_fp_int_builtin_inexact ||
231 !flag_trapping_math")
233 ;; fix_trunc is allowed to raise inexact exception even if
234 ;; -fno-fp-int-builtin-inexact. Because the middle end trys to match
235 ;; (FIX x) and it does not know (FIX (UNSPEC_SIMD_FRINTRZ x)), we need
236 ;; to use define_insn_and_split instead of define_expand (expanders are
237 ;; not considered during matching).
238 (define_insn_and_split "fix_trunc<mode><vimode>2"
239 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
240 (fix:<VIMODE> (match_operand:FVEC 1 "register_operand" "f")))]
246 emit_insn (gen_<simd_isa>_<x>vftintrz_<simdifmt_for_f>_<simdfmt> (
247 operands[0], operands[1]));
250 [(set_attr "type" "simd_fcvt")
251 (set_attr "mode" "<MODE>")])
255 (define_code_attr muh
256 [(sign_extend "smul_highpart")
257 (zero_extend "umul_highpart")])
259 (define_insn "<su>mul<mode>3_highpart"
260 [(set (match_operand:IVEC 0 "register_operand" "=f")
261 (<muh>:IVEC (match_operand:IVEC 1 "register_operand" "f")
262 (match_operand:IVEC 2 "register_operand" "f")))
263 (any_extend (const_int 0))]
265 "<x>vmuh.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2"
266 [(set_attr "type" "simd_int_arith")
267 (set_attr "mode" "<MODE>")])
269 ;; <x>vrotr.{b/h/w/d}
271 (define_insn "vrotr<mode>3"
272 [(set (match_operand:IVEC 0 "register_operand" "=f")
273 (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
274 (match_operand:IVEC 2 "register_operand" "f")))]
276 "<x>vrotr.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
277 [(set_attr "type" "simd_int_arith")
278 (set_attr "mode" "<MODE>")])
280 ;; Expand left rotate to right rotate.
281 (define_expand "vrotl<mode>3"
283 (neg:IVEC (match_operand:IVEC 2 "register_operand")))
284 (set (match_operand:IVEC 0 "register_operand")
285 (rotatert:IVEC (match_operand:IVEC 1 "register_operand")
289 operands[3] = gen_reg_rtx (<MODE>mode);
292 ;; Expand left rotate with a scalar amount to right rotate: negate the
293 ;; scalar before broadcasting it because scalar negation is cheaper than
295 (define_expand "rotl<mode>3"
297 (neg:SI (match_operand:SI 2 "register_operand")))
299 (vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0)))
300 (set (match_operand:IVEC 0 "register_operand")
301 (rotatert:IVEC (match_operand:IVEC 1 "register_operand")
305 operands[3] = gen_reg_rtx (SImode);
306 operands[4] = gen_reg_rtx (<MODE>mode);
309 ;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d}
311 (define_insn "<optab><mode>3"
312 [(set (match_operand:IVEC 0 "register_operand" "=f")
314 (match_operand:IVEC 1 "register_operand" "f")
315 (match_operand:SI 2 "const_<bitimm>_operand")))]
317 "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2"
318 [(set_attr "type" "simd_int_arith")
319 (set_attr "mode" "<MODE>")])
321 ;; <x>vfcmp.*.{s/d} with defined RTX code
322 ;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have
323 ;; to reverse the operands ourselves :(.
324 (define_code_iterator fcond_simd [unordered uneq unlt unle eq lt le
326 (define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
327 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
329 (match_operand:FVEC 1 "register_operand" "f")
330 (match_operand:FVEC 2 "register_operand" "f")))]
332 "<x>vfcmp.<fcond>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
333 [(set_attr "type" "simd_fcmp")
334 (set_attr "mode" "<MODE>")])
336 ;; There are no fcmp.{sge/sgt/cuge/cugt}.{s/d} menmonics in GAS, so we have
337 ;; to reverse the operands ourselves.
338 (define_code_iterator fcond_simd_rev [ge gt unge ungt])
340 (define_code_attr fcond_rev_asm
346 (define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
347 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
348 (fcond_simd_rev:<VIMODE>
349 (match_operand:FVEC 1 "register_operand" "f")
350 (match_operand:FVEC 2 "register_operand" "f")))]
352 "<x>vfcmp.<fcond_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1";
353 [(set_attr "type" "simd_fcmp")
354 (set_attr "mode" "<MODE>")])
356 ;; <x>vfcmp.*.{s/d} without defined RTX code, but with defined RTX code for
357 ;; its inverse. Again, there are no fcmp.{sugt/suge/cgt/cge}.{s/d}
358 ;; menmonics in GAS, so we have to reverse the operands ourselves.
359 (define_code_iterator fcond_inv [ge gt unge ungt])
360 (define_code_iterator fcond_inv_rev [le lt unle unlt])
361 (define_code_attr fcond_inv
370 (define_code_attr fcond_inv_rev_asm
376 (define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
377 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
380 (match_operand:FVEC 1 "register_operand" "f")
381 (match_operand:FVEC 2 "register_operand" "f"))))]
383 "<x>vfcmp.<fcond_inv>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
384 [(set_attr "type" "simd_fcmp")
385 (set_attr "mode" "<MODE>")])
387 (define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
388 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
390 (fcond_inv_rev:<VIMODE>
391 (match_operand:FVEC 1 "register_operand" "f")
392 (match_operand:FVEC 2 "register_operand" "f"))))]
394 "<x>vfcmp.<fcond_inv_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1"
395 [(set_attr "type" "simd_fcmp")
396 (set_attr "mode" "<MODE>")])
398 ;; <x>vfcmp.*.{s/d} instructions only as instrinsics
399 (define_c_enum "unspec"
400 [UNSPEC_SIMD_FCMP_CAF
404 UNSPEC_SIMD_FCMP_SUEQ
407 UNSPEC_SIMD_FCMP_SUNE])
409 (define_int_iterator SIMD_FCMP
410 [UNSPEC_SIMD_FCMP_CAF
414 UNSPEC_SIMD_FCMP_SUEQ
417 UNSPEC_SIMD_FCMP_SUNE])
419 (define_int_attr fcond_unspec
420 [(UNSPEC_SIMD_FCMP_CAF "caf")
421 (UNSPEC_SIMD_FCMP_SAF "saf")
422 (UNSPEC_SIMD_FCMP_SEQ "seq")
423 (UNSPEC_SIMD_FCMP_SUN "sun")
424 (UNSPEC_SIMD_FCMP_SUEQ "sueq")
425 (UNSPEC_SIMD_FCMP_CNE "cne")
426 (UNSPEC_SIMD_FCMP_SOR "sor")
427 (UNSPEC_SIMD_FCMP_SUNE "sune")])
429 (define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
430 [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
431 (unspec:<VIMODE> [(match_operand:FVEC 1 "register_operand" "f")
432 (match_operand:FVEC 2 "register_operand" "f")]
435 "<x>vfcmp.<fcond_unspec>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
436 [(set_attr "type" "simd_fcmp")
437 (set_attr "mode" "<MODE>")])
439 ; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
440 ; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
441 ; of smin/smax though, see PR105414 and PR107013.
443 (define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
444 (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
446 (define_insn "<fmaxmin><mode>3"
447 [(set (match_operand:FVEC 0 "register_operand" "=f")
448 (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
449 (match_operand:FVEC 2 "register_operand" "f")]
452 "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
453 [(set_attr "type" "simd_fminmax")
454 (set_attr "mode" "<MODE>")])
456 ;; ... and also reduc operations.
457 (define_expand "reduc_<fmaxmin>_scal_<mode>"
458 [(match_operand:<UNITMODE> 0 "register_operand")
459 (match_operand:FVEC 1 "register_operand")
460 (const_int UNSPEC_FMAXMIN)]
463 rtx tmp = gen_reg_rtx (<MODE>mode);
464 loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
465 emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
471 (define_insn "neg<mode>2"
472 [(set (match_operand:FVEC 0 "register_operand" "=f")
473 (neg:FVEC (match_operand:FVEC 1 "register_operand" "f")))]
475 "<x>vbitrevi.<simdfmt_as_i>\t%<wu>0,%<wu>1,<elmsgnbit>"
476 [(set_attr "type" "simd_logic")
477 (set_attr "mode" "<MODE>")])
479 ; The LoongArch SX Instructions.
482 ; The LoongArch ASX Instructions.