1 ;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
19 (include "predicates.md")
20 (include "constraints.md")
22 ;; {{{ Constants and enums
27 (FLAT_SCRATCH_REG 102)
28 (FLAT_SCRATCH_LO_REG 102)
29 (FLAT_SCRATCH_HI_REG 103)
31 (XNACK_MASK_LO_REG 104)
32 (XNACK_MASK_HI_REG 105)
58 (define_c_enum "unspecv" [
65 (define_c_enum "unspec" [
71 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
72 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
74 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
75 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
77 UNSPEC_MOV_DPP_SWAP_PAIRS
78 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN
79 UNSPEC_MOV_DPP_DISTRIBUTE_ODD
80 UNSPEC_CMUL UNSPEC_CMUL_CONJ
81 UNSPEC_CMUL_ADD UNSPEC_CMUL_SUB
88 UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2
89 UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT
90 UNSPEC_DIV_SCALE UNSPEC_DIV_FMAS UNSPEC_DIV_FIXUP])
95 ; Instruction type (encoding) as described in the ISA specification.
96 ; The following table summarizes possible operands of individual instruction
97 ; types and corresponding constraints.
99 ; sop2 - scalar, two inputs, one output
100 ; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
101 ; vccz,execz,scc,inline immedate,fp inline immediate
102 ; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec
104 ; Constraints "=SD, SD", "SSA,SSB","SSB,SSA"
106 ; sopk - scalar, inline constant input, one output
107 ; simm16: 16bit inline constant
108 ; sdst: same as sop2/ssrc0
110 ; Constraints "=SD", "J"
112 ; sop1 - scalar, one input, one output
113 ; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ
114 ; sdst: same as sop2/sdst
116 ; Constraints "=SD", "SSA"
118 ; sopc - scalar, two inputs, one comparsion
119 ; ssrc0: same as sop2/ssc0.
121 ; Constraints "SSI,SSA","SSA,SSI"
123 ; sopp - scalar, one constant input, one special
126 ; smem - scalar memory
127 ; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in
129 ; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma
130 ; offset: sgpr or 20bit unsigned byte offset
132 ; vop2 - vector, two inputs, one output
133 ; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec,
134 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
135 ; scc, lds, literal constant, vgpr0-255
138 ; Limitations: At most one SGPR, at most one constant
139 ; if constant is used, SGPR must be M0
140 ; Only SRC0 can be LDS_DIRECT
142 ; constraints: "=v", "vBSv", "v"
144 ; vop1 - vector, one input, one output
145 ; vsrc0: same as vop2/src0
148 ; constraints: "=v", "vBSv"
150 ; vopc - vector, two inputs, one comparsion output;
151 ; vsrc0: same as vop2/src0
155 ; constraints: "vASv", "v"
157 ; vop3a - vector, three inputs, one output
158 ; vdst: vgpr0-255, for v_cmp sgpr or vcc
160 ; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec,
161 ; inline constant -16 to -64, fp inline immediate, vccz, execz,
163 ; FIXME: really missing 1/pi? really 104 SGPRs
165 ; vop3b - vector, three inputs, one vector output, one scalar output
166 ; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0
168 ; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
170 ; vop3p_mai - vector, three inputs, one vector output
171 ; vsrc0,vsrc1,vsrc2: inline constant -16 to -64, fp inline immediate,
172 ; (acc or arch) vgpr0-255
173 ; vdst: (acc or arch) vgpr0-255
175 ; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
177 ; dst_sel: BYTE_0-3, WORD_0-1, DWORD
178 ; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE
180 ; src0_sel: BYTE_0-3, WORD_0-1, DWORD
181 ; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg,
184 ; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops
186 ; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1,
187 ; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror,
189 ; flags: src0_neg, src0_abs, src1_neg, src1_abs
190 ; bank_mask: 4-bit mask
191 ; row_mask: 4-bit mask
193 ; ds - Local and global data share instructions.
194 ; offset0: 8-bit constant
195 ; offset1: 8-bit constant
202 ; mubuf - Untyped memory buffer operation. First word with LDS, second word
204 ; offset: 12-bit constant
209 ; flags: offen, idxen, glc, lds, slc, tfe
211 ; mtbuf - Typed memory buffer operation. Two words
212 ; offset: 12-bit constant
213 ; dfmt: 4-bit constant
214 ; nfmt: 3-bit constant
219 ; flags: offen, idxen, glc, lds, slc, tfe
221 ; flat - flat or global memory operations
227 ; mult - expands to multiple instructions (pseudo encoding)
229 ; vmult - as mult, when a vector instruction is used.
232 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
233 vop3a,vop3b,vop3p_mai,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,
235 (const_string "unknown"))
237 ; Set if instruction is executed in scalar or vector unit
239 (define_attr "unit" "unknown,scalar,vector"
240 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
241 (const_string "scalar")
242 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,vop3p_mai,
243 vop_sdwa,vop_dpp,flat,vmult")
244 (const_string "vector")]
245 (const_string "unknown")))
247 ; All vector instructions run as 64 threads as predicated by the EXEC
248 ; register. Scalar operations in vector register require a single lane
249 ; enabled, vector moves require a full set of lanes enabled, and most vector
250 ; operations handle the lane masking themselves.
251 ; The md_reorg pass is responsible for ensuring that EXEC is set appropriately
252 ; according to the following settings:
253 ; auto - md_reorg will inspect def/use to determine what to do.
254 ; none - exec is not needed.
255 ; single - disable all but lane zero.
256 ; full - enable all lanes.
258 (define_attr "exec" "auto,none,single,full"
259 (const_string "auto"))
261 ; Infer the (worst-case) length from the instruction type by default. Many
262 ; types can have an optional immediate word following, which we include here.
263 ; "Multiple" types are counted as two 64-bit instructions. This is just a
264 ; default fallback: it can be overridden per-alternative in insn patterns for
267 (define_attr "length" ""
268 (cond [(eq_attr "type" "sop1") (const_int 8)
269 (eq_attr "type" "sop2") (const_int 8)
270 (eq_attr "type" "sopk") (const_int 8)
271 (eq_attr "type" "sopc") (const_int 8)
272 (eq_attr "type" "sopp") (const_int 4)
273 (eq_attr "type" "smem") (const_int 8)
274 (eq_attr "type" "ds") (const_int 8)
275 (eq_attr "type" "vop1") (const_int 8)
276 (eq_attr "type" "vop2") (const_int 8)
277 (eq_attr "type" "vopc") (const_int 8)
278 (eq_attr "type" "vop3a") (const_int 8)
279 (eq_attr "type" "vop3b") (const_int 8)
280 (eq_attr "type" "vop_sdwa") (const_int 8)
281 (eq_attr "type" "vop_dpp") (const_int 8)
282 (eq_attr "type" "flat") (const_int 8)
283 (eq_attr "type" "mult") (const_int 16)
284 (eq_attr "type" "vmult") (const_int 16)]
287 ; Disable alternatives that only apply to specific ISA variants.
289 (define_attr "cdna" "any,cdna2" (const_string "any"))
290 (define_attr "rdna" "any,no,yes" (const_string "any"))
292 (define_attr "xnack" "na,off,on" (const_string "na"))
294 (define_attr "enabled" ""
295 (cond [(and (eq_attr "rdna" "no")
296 (ne (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0)))
298 (and (eq_attr "rdna" "yes")
299 (eq (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0)))
301 (and (eq_attr "cdna" "cdna2")
302 (eq (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0)))
304 (and (eq_attr "xnack" "off")
305 (ne (symbol_ref "TARGET_XNACK") (const_int 0)))
307 (and (eq_attr "xnack" "on")
308 (eq (symbol_ref "TARGET_XNACK") (const_int 0)))
312 ; We need to be able to identify v_readlane and v_writelane with
313 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
315 (define_attr "laneselect" "yes,no" (const_string "no"))
317 ; Identify instructions that require a "Manually Inserted Wait State" if
318 ; their inputs are overwritten by subsequent instructions.
320 (define_attr "delayeduse" "yes,no" (const_string "no"))
322 ; Identify instructions that require "Manually Inserted Wait State" if
323 ; a previous instruction writes to VCC. The number gives the number of NOPs.
325 (define_attr "vccwait" "" (const_int 0))
328 ;; {{{ Iterators useful across the wole machine description
330 (define_mode_iterator SIDI [SI DI])
331 (define_mode_iterator SFDF [SF DF])
332 (define_mode_iterator SISF [SI SF])
333 (define_mode_iterator QIHI [QI HI])
334 (define_mode_iterator DIDF [DI DF])
335 (define_mode_iterator FP [HF SF DF])
336 (define_mode_iterator FP_1REG [HF SF])
341 ; Translate RTX code into GCN instruction mnemonics with and without
342 ; suffixes such as _b32, etc.
344 (define_code_attr mnemonic
359 (popcount "bcnt_u32%b")])
361 (define_code_attr bare_mnemonic
368 (define_code_attr s_mnemonic
370 (popcount "bcnt1_i32%b")
373 (clrsb "flbit_i32%i")])
375 (define_code_attr revmnemonic
378 (lshiftrt "lshrrev%b")
379 (ashiftrt "ashrrev%i")])
381 ; Translate RTX code into corresponding expander name.
383 (define_code_attr expander
398 (popcount "popcount")
401 (sign_extend "extend")
402 (zero_extend "zero_extend")])
404 (define_code_attr fexpander
409 ;; {{{ Miscellaneous instructions
415 [(set_attr "type" "sopp")])
417 ; FIXME: What should the value of the immediate be? Zero is disallowed, so
420 [(trap_if (const_int 1) (const_int 0))]
423 [(set_attr "type" "sopp")])
428 ;; All scalar modes we support moves in.
429 (define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF])
431 ; This is the entry point for creating all kinds of scalar moves,
432 ; including reloads and symbols.
434 (define_expand "mov<mode>"
435 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
436 (match_operand:MOV_MODE 1 "general_operand"))]
439 if (SUBREG_P (operands[1])
440 && GET_MODE (operands[1]) == SImode
441 && GET_MODE (SUBREG_REG (operands[1])) == BImode)
443 /* (reg:BI VCC) has nregs==2 to ensure it gets clobbered as a whole,
444 but (subreg:SI (reg:BI VCC)) doesn't, which causes the LRA liveness
445 checks to assert. Transform this:
446 (set (reg:SI) (subreg:SI (reg:BI)))
448 (set (subreg:BI (reg:SI)) (reg:BI)) */
449 operands[0] = gen_rtx_SUBREG (BImode, operands[0], 0);
450 operands[1] = SUBREG_REG (operands[1]);
452 if (SUBREG_P (operands[0])
453 && GET_MODE (operands[0]) == SImode
454 && GET_MODE (SUBREG_REG (operands[0])) == BImode)
456 /* Likewise, transform this:
457 (set (subreg:SI (reg:BI)) (reg:SI))
459 (set (reg:BI) (subreg:BI (reg:SI))) */
460 operands[0] = SUBREG_REG (operands[0]);
461 operands[1] = gen_rtx_SUBREG (BImode, operands[1], 0);
464 if (MEM_P (operands[0]))
465 operands[1] = force_reg (<MODE>mode, operands[1]);
467 if (!lra_in_progress && !reload_completed
468 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1]))
470 /* Something is probably trying to generate a move
471 which can only work indirectly.
472 E.g. Move from LDS memory to SGPR hardreg
473 or MEM:QI to SGPR. */
474 rtx tmpreg = gen_reg_rtx (<MODE>mode);
475 emit_insn (gen_mov<mode> (tmpreg, operands[1]));
476 emit_insn (gen_mov<mode> (operands[0], tmpreg));
480 if (<MODE>mode == DImode
481 && (GET_CODE (operands[1]) == SYMBOL_REF
482 || GET_CODE (operands[1]) == LABEL_REF))
485 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1]));
487 emit_insn (gen_movdi_symbol (operands[0], operands[1]));
492 ; Split invalid moves into two valid moves
495 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand")
496 (match_operand:MOV_MODE 1 "general_operand"))]
497 "!reload_completed && !lra_in_progress
498 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
499 [(set (match_dup 2) (match_dup 1))
500 (set (match_dup 0) (match_dup 2))]
502 operands[2] = gen_reg_rtx(<MODE>mode);
505 ; We need BImode move so we can reload flags registers.
507 (define_insn "*movbi"
508 [(set (match_operand:BI 0 "nonimmediate_operand"
509 "=Sg, v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
510 (match_operand:BI 1 "gcn_load_operand"
511 "SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
514 /* SCC as an operand is currently not accepted by the LLVM assembler, so
515 we emit bytes directly as a workaround. */
516 switch (which_alternative) {
518 return "s_mov_b32\t%0, %1";
520 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG)
521 return "; v_mov_b32\t%0, %1\;"
524 ".byte\t((%V0<<1)&0xff)\;"
525 ".byte\t0x7e|(%V0>>7)";
527 return "v_mov_b32\t%0, %1";
529 return "v_readlane_b32\t%0, %1, 0";
531 return "s_cmpk_lg_u32\t%1, 0";
533 return "v_cmp_ne_u32\tvcc, 0, %1";
535 return "s_mov_b32\tvcc_lo, %1\;"
536 "s_mov_b32\tvcc_hi, 0";
539 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
541 return "s_store_dword\t%1, %A0";
544 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
546 return "flat_store_dword\t%A0, %1%O0%g0";
549 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
551 return "global_store_dword\t%A0, %1%O0%g0";
556 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
557 flat,flat,flat,flat")
558 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
559 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
560 (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
564 (define_insn "*mov<mode>_insn"
565 [(set (match_operand:SISF 0 "nonimmediate_operand")
566 (match_operand:SISF 1 "gcn_load_operand"))]
568 {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
569 [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1
570 [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1
571 [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
572 [SD ,RB ;smem ,* ,12,* ,off] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
573 [&SD ,RB ;smem ,* ,12,* ,on ] ^
574 [RB ,Sm ;smem ,* ,12,* ,* ] s_buffer_store%s1\t%1, s[0:3], %0
575 [Sm ,RS ;smem ,* ,12,* ,off] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
576 [&Sm ,RS ;smem ,* ,12,* ,on ] ^
577 [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dword\t%1, %A0
578 [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1
579 [Sg ,v ;vop3a,none,8 ,* ,* ] v_readlane_b32\t%0, %1, 0
580 [v ,Sv ;vop3a,none,8 ,* ,* ] v_writelane_b32\t%0, %1, 0
581 [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1
582 [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1
583 [a ,a ;vop1 ,* ,4,cdna2,* ] v_accvgpr_mov_b32\t%0, %1
584 [v ,RF ;flat ,* ,12,* ,off] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
585 [&v ,RF ;flat ,* ,12,* ,on ] ^
586 [^a ,RF ;flat ,* ,12,cdna2,off] ^
587 [&^a ,RF ;flat ,* ,12,cdna2,on ] ^
588 [RF ,v ;flat ,* ,12,* ,* ] flat_store_dword\t%A0, %1%O0%g0
589 [RF ,a ;flat ,* ,12,cdna2,* ] ^
590 [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1
591 [RLRG,v ;ds ,* ,12,* ,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
592 [v ,RLRG;ds ,* ,12,* ,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
593 [SD ,Y ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
594 [v ,RM ;flat ,* ,12,* ,off] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
595 [&v ,RM ;flat ,* ,12,* ,on ] ^
596 [^a ,RM ;flat ,* ,12,cdna2,off] ^
597 [&^a ,RM ;flat ,* ,12,cdna2,on ] ^
598 [RM ,v ;flat ,* ,12,* ,* ] global_store_dword\t%A0, %1%O0%g0
599 [RM ,a ;flat ,* ,12,cdna2,* ] ^
602 ; 8/16bit move pattern
603 ; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
605 (define_insn "*mov<mode>_insn"
606 [(set (match_operand:QIHI 0 "nonimmediate_operand")
607 (match_operand:QIHI 1 "gcn_load_operand"))]
608 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
609 {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
610 [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1
611 [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1
612 [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
613 [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1
614 [Sg ,v ;vop3a,none,4 ,* ,* ] v_readlane_b32\t%0, %1, 0
615 [v ,Sv ;vop3a,none,4 ,* ,* ] v_writelane_b32\t%0, %1, 0
616 [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1
617 [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1
618 [a ,a ;vop1 ,* ,8,cdna2,* ] v_accvgpr_mov_b32\t%0, %1
619 [v ,RF ;flat ,* ,12,* ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
620 [&v ,RF ;flat ,* ,12,* ,on ] ^
621 [^a ,RF ;flat ,* ,12,cdna2,off] ^
622 [&^a ,RF ;flat ,* ,12,cdna2,on ] ^
623 [RF ,v ;flat ,* ,12,* ,* ] flat_store%s0\t%A0, %1%O0%g0
624 [RF ,a ;flat ,* ,12,cdna2,* ] ^
625 [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1
626 [RLRG,v ;ds ,* ,12,* ,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
627 [v ,RLRG;ds ,* ,12,* ,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
628 [v ,RM ;flat ,* ,12,* ,off] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
629 [&v ,RM ;flat ,* ,12,* ,on ] ^
630 [^a ,RM ;flat ,* ,12,cdna2,off] ^
631 [&^a ,RM ;flat ,* ,12,cdna2,on ] ^
632 [RM ,v ;flat ,* ,12,* ,* ] global_store%s0\t%A0, %1%O0%g0
633 [RM ,a ;flat ,* ,12,cdna2,* ] ^
638 (define_insn_and_split "*mov<mode>_insn"
639 [(set (match_operand:DIDF 0 "nonimmediate_operand")
640 (match_operand:DIDF 1 "general_operand"))]
641 "GET_CODE(operands[1]) != SYMBOL_REF"
642 {@ [cons: =0, 1; attrs: type, length, cdna, xnack]
643 [SD ,SSA ;sop1 ,4 ,* ,* ] s_mov_b64\t%0, %1
644 [SD ,C ;sop1 ,8 ,* ,* ] ^
645 [SD ,DB ;mult ,* ,* ,* ] #
646 [RS ,Sm ;smem ,12,* ,* ] s_store_dwordx2\t%1, %A0
647 [Sm ,RS ;smem ,12,* ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
648 [&Sm ,RS ;smem ,12,* ,on ] ^
649 [v ,v ;vmult,* ,* ,* ] #
650 [v ,DB ;vmult,* ,* ,* ] #
651 [Sg ,v ;vmult,* ,* ,* ] #
652 [v ,Sv ;vmult,* ,* ,* ] #
653 [v ,^a ;vmult,* ,* ,* ] #
654 [a ,v ;vmult,* ,* ,* ] #
655 [a ,a ;vmult,* ,cdna2,* ] #
656 [v ,RF ;flat ,12,* ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
657 [&v ,RF ;flat ,12,* ,on ] ^
658 [^a ,RF ;flat ,12,cdna2,off] ^
659 [&^a ,RF ;flat ,12,cdna2,on ] ^
660 [RF ,v ;flat ,12,* ,* ] flat_store_dwordx2\t%A0, %1%O0%g0
661 [RF ,a ;flat ,12,cdna2,* ] ^
662 [RLRG,v ;ds ,12,* ,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
663 [v ,RLRG;ds ,12,* ,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
664 [v ,RM ;flat ,12,* ,off] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
665 [&v ,RM ;flat ,12,* ,on ] ^
666 [^a ,RM ;flat ,12,cdna2,off] ^
667 [&^a ,RM ;flat ,12,cdna2,on ] ^
668 [RM ,v ;flat ,12,* ,* ] global_store_dwordx2\t%A0, %1%O0%g0
669 [RM ,a ;flat ,12,cdna2,* ] ^
672 && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
673 && !gcn_sgpr_move_p (operands[0], operands[1]))
674 || (GET_CODE (operands[1]) == CONST_INT
675 && !gcn_constant64_p (operands[1])))"
676 [(set (match_dup 0) (match_dup 1))
677 (set (match_dup 2) (match_dup 3))]
679 rtx inlo = gen_lowpart (SImode, operands[1]);
680 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]);
681 rtx outlo = gen_lowpart (SImode, operands[0]);
682 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]);
684 /* Ensure that overlapping registers aren't corrupted. */
685 if (reg_overlap_mentioned_p (outlo, inhi))
703 (define_insn_and_split "*movti_insn"
704 [(set (match_operand:TI 0 "nonimmediate_operand")
705 (match_operand:TI 1 "general_operand" ))]
707 {@ [cons: =0, 1; attrs: type, delayeduse, length, cdna, xnack]
708 [SD ,SSB;mult ,* ,* ,* ,* ] #
709 [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dwordx4\t%1, %A0
710 [Sm ,RS ;smem ,yes,12,* ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
711 [&Sm,RS ;smem ,yes,12,* ,on ] ^
712 [RF ,v ;flat ,* ,12,* ,* ] flat_store_dwordx4\t%A0, %1%O0%g0
713 [RF ,a ;flat ,* ,12,cdna2,* ] ^
714 [v ,RF ;flat ,* ,12,* ,off] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
715 [&v ,RF ;flat ,* ,12,* ,on ] ^
716 [^a ,RF ;flat ,* ,12,cdna2,off] ^
717 [&^a,RF ;flat ,* ,12,cdna2,on ] ^
718 [v ,v ;vmult,* ,* ,* ,* ] #
719 [v ,Sv ;vmult,* ,* ,* ,* ] #
720 [SD ,v ;vmult,* ,* ,* ,* ] #
721 [RM ,v ;flat ,yes,12,* ,* ] global_store_dwordx4\t%A0, %1%O0%g0
722 [RM ,a ;flat ,yes,12,cdna2,* ] ^
723 [v ,RM ;flat ,* ,12,* ,off] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
724 [&v ,RM ;flat ,* ,12,* ,on ] ^
725 [^a ,RM ;flat ,* ,12,cdna2,off] ^
726 [&^a,RM ;flat ,* ,12,cdna2,on ] ^
727 [RL ,v ;ds ,* ,12,* ,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
728 [v ,RL ;ds ,* ,12,* ,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
729 [v ,^a ;vmult,* ,* ,* ,* ] #
730 [a ,v ;vmult,* ,* ,* ,* ] #
731 [a ,a ;vmult,* ,* ,cdna2,* ] #
734 && REG_P (operands[0])
735 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
736 [(set (match_dup 0) (match_dup 1))
737 (set (match_dup 2) (match_dup 3))
738 (set (match_dup 4) (match_dup 5))
739 (set (match_dup 6) (match_dup 7))]
741 gcc_assert (rtx_equal_p (operands[0], operands[1])
742 || !reg_overlap_mentioned_p (operands[0], operands[1]));
743 operands[6] = gcn_operand_part (TImode, operands[0], 3);
744 operands[7] = gcn_operand_part (TImode, operands[1], 3);
745 operands[4] = gcn_operand_part (TImode, operands[0], 2);
746 operands[5] = gcn_operand_part (TImode, operands[1], 2);
747 operands[2] = gcn_operand_part (TImode, operands[0], 1);
748 operands[3] = gcn_operand_part (TImode, operands[1], 1);
749 operands[0] = gcn_operand_part (TImode, operands[0], 0);
750 operands[1] = gcn_operand_part (TImode, operands[1], 0);
754 ;; {{{ Prologue/Epilogue
756 (define_insn "prologue_use"
757 [(unspec_volatile [(match_operand 0 "register_operand")] UNSPECV_PROLOGUE_USE)]
758 "1 /* This comment silences a warning for operands[2]. */"
760 [(set_attr "length" "0")])
762 (define_expand "prologue"
766 gcn_expand_prologue ();
770 (define_expand "epilogue"
774 gcn_expand_epilogue ();
781 ; This pattern must satisfy simplejump_p, which means it cannot be a parallel
782 ; that clobbers SCC. Thus, we must preserve SCC if we're generating a long
787 (label_ref (match_operand 0)))]
790 if (get_attr_length (insn) == 4)
791 return "s_branch\t%0";
793 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */
794 return "s_mov_b32\ts22, scc\;"
795 "s_getpc_b64\ts[20:21]\;"
796 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
797 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
798 "s_cmpk_lg_u32\ts22, 0\;"
799 "s_setpc_b64\ts[20:21]";
801 [(set_attr "type" "sopp")
803 (if_then_else (and (ge (minus (match_dup 0) (pc))
805 (lt (minus (match_dup 0) (pc))
810 (define_insn "indirect_jump"
812 (match_operand:DI 0 "register_operand" "Sg"))]
815 [(set_attr "type" "sop1")
816 (set_attr "length" "4")])
821 (match_operator:BI 1 "gcn_conditional_operator"
822 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV")
824 (label_ref (match_operand 0))
828 if (get_attr_length (insn) == 4)
829 return "s_cbranch%C1\t%0";
832 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but
834 if (REGNO (operands[2]) == SCC_REG)
836 if (GET_CODE (operands[1]) == EQ)
837 return "s_cbranch%c1\t.Lskip%=\;"
838 "s_getpc_b64\ts[20:21]\;"
839 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
840 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
841 "s_cmp_lg_u32\t0, 0\;"
842 "s_setpc_b64\ts[20:21]\n"
845 return "s_cbranch%c1\t.Lskip%=\;"
846 "s_getpc_b64\ts[20:21]\;"
847 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
848 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
849 "s_cmp_eq_u32\t0, 0\;"
850 "s_setpc_b64\ts[20:21]\n"
854 return "s_cbranch%c1\t.Lskip%=\;"
855 "s_mov_b32\ts22, scc\;"
856 "s_getpc_b64\ts[20:21]\;"
857 "s_add_u32\ts20, s20, %0@rel32@lo+4\;"
858 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;"
859 "s_cmpk_lg_u32\ts22, 0\;"
860 "s_setpc_b64\ts[20:21]\n"
864 [(set_attr "type" "sopp")
866 (if_then_else (and (ge (minus (match_dup 0) (pc))
868 (lt (minus (match_dup 0) (pc))
873 ; Returning from a normal function is different to returning from a
876 (define_insn "gcn_return"
880 if (cfun && cfun->machine && cfun->machine->normal_function)
881 return "s_setpc_b64\ts[18:19]";
883 return "s_waitcnt\tlgkmcnt(0)\;s_endpgm";
885 [(set_attr "type" "sop1")
886 (set_attr "length" "12")])
888 (define_expand "call"
889 [(parallel [(call (match_operand 0 "")
890 (match_operand 1 ""))
891 (clobber (reg:DI LR_REGNUM))
892 (clobber (match_scratch:DI 2))])]
896 (define_insn "gcn_simple_call"
897 [(call (mem (match_operand 0 "immediate_operand" "Y,B"))
898 (match_operand 1 "const_int_operand"))
899 (clobber (reg:DI LR_REGNUM))
900 (clobber (match_scratch:DI 2 "=&Sg,X"))]
903 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2
904 s_swappc_b64\ts[18:19], %0"
905 [(set_attr "type" "mult,sop1")
906 (set_attr "length" "24,4")])
908 (define_insn "movdi_symbol"
909 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
910 (match_operand:DI 1 "general_operand" "Y"))
911 (clobber (reg:BI SCC_REG))]
912 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
914 /* This s_load may not be XNACK-safe on devices where the GOT may fault.
915 DGPUs are most likely fine. */
916 if (SYMBOL_REF_P (operands[1])
917 && SYMBOL_REF_WEAK (operands[1]))
918 return "s_getpc_b64\t%0\;"
919 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
920 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
921 "s_load_dwordx2\t%0, %0\;"
922 "s_waitcnt\tlgkmcnt(0)";
924 return "s_getpc_b64\t%0\;"
925 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
926 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4";
928 [(set_attr "type" "mult")
929 (set_attr "length" "32")])
931 (define_insn "movdi_symbol_save_scc"
932 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg")
933 (match_operand:DI 1 "general_operand" "Y"))
934 (clobber (reg:BI CC_SAVE_REG))]
935 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF)
936 && (lra_in_progress || reload_completed)"
938 /* !!! These sequences clobber CC_SAVE_REG. */
940 /* This s_load may not be XNACK-safe on devices where the GOT may fault.
941 DGPUs are most likely fine. */
942 if (SYMBOL_REF_P (operands[1])
943 && SYMBOL_REF_WEAK (operands[1]))
944 return "s_mov_b32\ts22, scc\;"
946 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;"
947 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;"
948 "s_load_dwordx2\t%0, %0\;"
949 "s_cmpk_lg_u32\ts22, 0\;"
950 "s_waitcnt\tlgkmcnt(0)";
952 return "s_mov_b32\ts22, scc\;"
954 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;"
955 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;"
956 "s_cmpk_lg_u32\ts22, 0";
958 [(set_attr "type" "mult")
959 (set_attr "length" "40")])
962 (define_insn "gcn_indirect_call"
963 [(call (mem (match_operand:DI 0 "register_operand" "Sg"))
964 (match_operand 1 "" ""))
965 (clobber (reg:DI LR_REGNUM))
966 (clobber (match_scratch:DI 2 "=X"))]
968 "s_swappc_b64\ts[18:19], %0"
969 [(set_attr "type" "sop1")
970 (set_attr "length" "4")])
972 (define_expand "call_value"
973 [(parallel [(set (match_operand 0 "")
974 (call (match_operand 1 "")
975 (match_operand 2 "")))
976 (clobber (reg:DI LR_REGNUM))
977 (clobber (match_scratch:DI 3))])]
981 (define_insn "gcn_call_value"
982 [(set (match_operand 0 "register_operand" "=Sgv,Sgv")
983 (call (mem (match_operand 1 "immediate_operand" " Y, B"))
984 (match_operand 2 "const_int_operand")))
985 (clobber (reg:DI LR_REGNUM))
986 (clobber (match_scratch:DI 3 "=&Sg, X"))]
989 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
990 s_swappc_b64\ts[18:19], %1"
991 [(set_attr "type" "sop1")
992 (set_attr "length" "24")])
994 (define_insn "gcn_call_value_indirect"
995 [(set (match_operand 0 "register_operand" "=Sgv")
996 (call (mem (match_operand:DI 1 "register_operand" " Sg"))
997 (match_operand 2 "" "")))
998 (clobber (reg:DI LR_REGNUM))
999 (clobber (match_scratch:DI 3 "= X"))]
1001 "s_swappc_b64\ts[18:19], %1"
1002 [(set_attr "type" "sop1")
1003 (set_attr "length" "4")])
1005 ; GCN does not have an instruction to clear only part of the instruction
1006 ; cache, so the operands are ignored.
1008 (define_insn "clear_icache"
1010 [(match_operand 0 "") (match_operand 1 "")]
1011 UNSPECV_ICACHE_INV)]
1014 [(set_attr "type" "sopp")
1015 (set_attr "length" "4")])
1020 ; 32-bit compare, scalar unit only
1022 (define_insn "cstoresi4"
1023 [(set (match_operand:BI 0 "gcn_conditional_register_operand"
1025 (match_operator:BI 1 "gcn_compare_operator"
1026 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS")
1027 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))]
1034 [(set_attr "type" "sopc,sopk,sopk,sopk")
1035 (set_attr "length" "4,4,8,8")])
1037 (define_expand "cbranchsi4"
1038 [(match_operator 0 "gcn_compare_operator"
1039 [(match_operand:SI 1 "gcn_alu_operand")
1040 (match_operand:SI 2 "gcn_alu_operand")])
1044 rtx cc = gen_reg_rtx (BImode);
1045 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2]));
1046 emit_jump_insn (gen_cjump (operands[3],
1047 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1051 ; 64-bit compare; either unit, but scalar allows limited operators
1053 (define_expand "cstoredi4"
1054 [(set (match_operand:BI 0 "gcn_conditional_register_operand")
1055 (match_operator:BI 1 "gcn_compare_operator"
1056 [(match_operand:DI 2 "gcn_alu_operand")
1057 (match_operand:DI 3 "gcn_alu_operand")]))]
1061 (define_insn "cstoredi4_vec_and_scalar"
1062 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV")
1063 (match_operator:BI 1 "gcn_compare_64bit_operator"
1064 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC")
1065 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))]
1069 v_cmp%E1\tvcc, %2, %3"
1070 [(set_attr "type" "sopc,vopc")
1071 (set_attr "length" "8")])
1073 (define_insn "cstoredi4_vector"
1074 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV")
1075 (match_operator:BI 1 "gcn_compare_operator"
1076 [(match_operand:DI 2 "gcn_alu_operand" "vSvB")
1077 (match_operand:DI 3 "gcn_alu_operand" " v")]))]
1079 "v_cmp%E1\tvcc, %2, %3"
1080 [(set_attr "type" "vopc")
1081 (set_attr "length" "8")])
1083 (define_expand "cbranchdi4"
1084 [(match_operator 0 "gcn_compare_operator"
1085 [(match_operand:DI 1 "gcn_alu_operand")
1086 (match_operand:DI 2 "gcn_alu_operand")])
1090 rtx cc = gen_reg_rtx (BImode);
1091 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2]));
1092 emit_jump_insn (gen_cjump (operands[3],
1093 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1097 ; FP compare; vector unit only
1099 (define_insn "cstore<mode>4"
1100 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV")
1101 (match_operator:BI 1 "gcn_fp_compare_operator"
1102 [(match_operand:SFDF 2 "gcn_alu_operand" "vB")
1103 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))]
1105 "v_cmp%E1\tvcc, %2, %3"
1106 [(set_attr "type" "vopc")
1107 (set_attr "length" "8")])
1109 (define_expand "cbranch<mode>4"
1110 [(match_operator 0 "gcn_fp_compare_operator"
1111 [(match_operand:SFDF 1 "gcn_alu_operand")
1112 (match_operand:SFDF 2 "gcn_alu_operand")])
1116 rtx cc = gen_reg_rtx (BImode);
1117 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2]));
1118 emit_jump_insn (gen_cjump (operands[3],
1119 gen_rtx_NE (BImode, cc, const0_rtx), cc));
1124 ;; {{{ ALU special cases: Plus
1126 (define_insn "addsi3"
1127 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v")
1128 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
1129 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv")))
1130 (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))
1131 (clobber (match_scratch:DI 4 "= X, X, X, cV"))]
1134 s_add_i32\t%0, %1, %2
1136 s_add_i32\t%0, %1, %2
1137 v_add_co_u32\t%0, vcc, %2, %1"
1138 [(set_attr "type" "sop2,sopk,sop2,vop2")
1139 (set_attr "length" "4,4,8,8")])
1141 (define_expand "addsi3_scc"
1142 [(parallel [(set (match_operand:SI 0 "register_operand")
1143 (plus:SI (match_operand:SI 1 "gcn_alu_operand")
1144 (match_operand:SI 2 "gcn_alu_operand")))
1145 (clobber (reg:BI SCC_REG))
1146 (clobber (scratch:DI))])]
1150 ; Having this as an insn_and_split allows us to keep together DImode adds
1151 ; through some RTL optimisation passes, and means the CC reg we set isn't
1152 ; dependent on the constraint alternative (which doesn't seem to work well).
1154 ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
1155 ; used as an operand due to the read of VCC, so we restrict constants to the
1156 ; inlinable range for that alternative.
1158 (define_insn_and_split "adddi3"
1159 [(set (match_operand:DI 0 "register_operand" "=Sg, v")
1160 (plus:DI (match_operand:DI 1 "register_operand" " Sg, v")
1161 (match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
1162 (clobber (match_scratch:BI 3 "=cs, X"))
1163 (clobber (match_scratch:DI 4 "= X,cV"))]
1166 "&& reload_completed"
1169 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1],
1171 ? VCC_REG : SCC_REG);
1173 emit_insn (gen_addsi3_scalar_carry
1174 (gcn_operand_part (DImode, operands[0], 0),
1175 gcn_operand_part (DImode, operands[1], 0),
1176 gcn_operand_part (DImode, operands[2], 0),
1178 rtx val = gcn_operand_part (DImode, operands[2], 1);
1179 if (val != const0_rtx)
1180 emit_insn (gen_addcsi3_scalar
1181 (gcn_operand_part (DImode, operands[0], 1),
1182 gcn_operand_part (DImode, operands[1], 1),
1183 gcn_operand_part (DImode, operands[2], 1),
1186 emit_insn (gen_addcsi3_scalar_zero
1187 (gcn_operand_part (DImode, operands[0], 1),
1188 gcn_operand_part (DImode, operands[1], 1),
1192 [(set_attr "type" "mult,vmult")
1193 (set_attr "length" "8")])
1195 (define_expand "adddi3_scc"
1196 [(parallel [(set (match_operand:DI 0 "register_operand")
1197 (plus:DI (match_operand:DI 1 "register_operand")
1198 (match_operand:DI 2 "nonmemory_operand")))
1199 (clobber (reg:BI SCC_REG))
1200 (clobber (scratch:DI))])]
1206 (define_insn "addsi3_scalar_carry"
1207 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1208 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")
1209 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB")))
1210 (set (match_operand:BI 3 "register_operand" "= cs,cV")
1211 (ltu:BI (plus:SI (match_dup 1)
1216 s_add_u32\t%0, %1, %2
1217 v_add_co_u32\t%0, vcc, %2, %1"
1218 [(set_attr "type" "sop2,vop2")
1219 (set_attr "length" "8,8")])
1221 (define_insn "addsi3_scalar_carry_cst"
1222 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1223 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v")
1224 (match_operand:SI 2 "const_int_operand" " n, n")))
1225 (set (match_operand:BI 4 "register_operand" "=cs,cV")
1226 (geu:BI (plus:SI (match_dup 1)
1228 (match_operand:SI 3 "const_int_operand" " n, n")))]
1229 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1231 s_add_u32\t%0, %1, %2
1232 v_add_co_u32\t%0, vcc, %2, %1"
1233 [(set_attr "type" "sop2,vop2")
1234 (set_attr "length" "4")])
1236 (define_insn "addcsi3_scalar"
1237 [(set (match_operand:SI 0 "register_operand" "= Sg, v")
1238 (plus:SI (plus:SI (zero_extend:SI
1239 (match_operand:BI 3 "register_operand" "= cs,cV"))
1240 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v"))
1241 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA")))
1242 (set (match_operand:BI 4 "register_operand" "= 3, 3")
1243 (ior:BI (ltu:BI (plus:SI
1245 (zero_extend:SI (match_dup 3))
1249 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1))
1253 s_addc_u32\t%0, %1, %2
1254 {v_addc_co_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc"
1255 [(set_attr "type" "sop2,vop2")
1256 (set_attr "length" "8,4")])
1258 (define_insn "addcsi3_scalar_zero"
1259 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1260 (plus:SI (zero_extend:SI
1261 (match_operand:BI 2 "register_operand" "=cs,cV"))
1262 (match_operand:SI 1 "gcn_alu_operand" "SgA, v")))
1264 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2))
1269 s_addc_u32\t%0, %1, 0
1270 {v_addc_co_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc"
1271 [(set_attr "type" "sop2,vop2")
1272 (set_attr "length" "4")])
1274 ; "addptr" is the same as "add" except that it must not write to VCC or SCC
1275 ; as a side-effect. Unfortunately GCN does not have a suitable instruction
1276 ; for this, so we use CC_SAVE_REG as a temp.
1277 ; Note that it is not safe to save/clobber/restore as separate insns because
1278 ; doing so will break data-flow analysis, so this must use multiple
1279 ; instructions in one insn.
1281 ; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
1282 ; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
1284 ; The SGPR alternative is preferred as it is typically used with mov_sgprbase.
1286 (define_insn "addptrdi3"
1287 [(set (match_operand:DI 0 "register_operand" "= v, Sg")
1289 (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0")
1290 (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))]
1294 if (which_alternative == 0)
1296 rtx new_operands[4] = { operands[0], operands[1], operands[2],
1297 gen_rtx_REG (DImode, CC_SAVE_REG) };
1299 output_asm_insn ("v_add_co_u32\t%L0, %3, %L2, %L1", new_operands);
1300 output_asm_insn ("{v_addc_co_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3",
1305 rtx new_operands[4] = { operands[0], operands[1], operands[2],
1306 gen_rtx_REG (BImode, CC_SAVE_REG) };
1308 output_asm_insn ("s_mov_b32\t%3, scc", new_operands);
1309 output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands);
1310 output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands);
1311 output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands);
1316 [(set_attr "type" "vmult,mult")
1317 (set_attr "length" "16,24")])
1320 ;; {{{ ALU special cases: Minus
1322 (define_insn "subsi3"
1323 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v")
1324 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv")
1325 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v")))
1326 (clobber (match_scratch:BI 3 "=cs, cs, X, X"))
1327 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))]
1330 s_sub_i32\t%0, %1, %2
1331 s_sub_i32\t%0, %1, %2
1332 v_subrev_co_u32\t%0, vcc, %2, %1
1333 v_sub_co_u32\t%0, vcc, %1, %2"
1334 [(set_attr "type" "sop2,sop2,vop2,vop2")
1335 (set_attr "length" "4,8,8,8")])
1337 (define_insn_and_split "subdi3"
1338 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg")
1340 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB")
1341 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA")))
1342 (clobber (reg:BI SCC_REG))]
1348 emit_insn (gen_subsi3_scalar_carry
1349 (gcn_operand_part (DImode, operands[0], 0),
1350 gcn_operand_part (DImode, operands[1], 0),
1351 gcn_operand_part (DImode, operands[2], 0)));
1352 rtx val = gcn_operand_part (DImode, operands[2], 1);
1353 if (val != const0_rtx)
1354 emit_insn (gen_subcsi3_scalar
1355 (gcn_operand_part (DImode, operands[0], 1),
1356 gcn_operand_part (DImode, operands[1], 1),
1357 gcn_operand_part (DImode, operands[2], 1)));
1359 emit_insn (gen_subcsi3_scalar_zero
1360 (gcn_operand_part (DImode, operands[0], 1),
1361 gcn_operand_part (DImode, operands[1], 1)));
1364 [(set_attr "length" "8")])
1366 (define_insn "subsi3_scalar_carry"
1367 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1368 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")
1369 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1370 (set (reg:BI SCC_REG)
1371 (gtu:BI (minus:SI (match_dup 1)
1375 "s_sub_u32\t%0, %1, %2"
1376 [(set_attr "type" "sop2")
1377 (set_attr "length" "8")])
1379 (define_insn "subsi3_scalar_carry_cst"
1380 [(set (match_operand:SI 0 "register_operand" "=Sg")
1381 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA")
1382 (match_operand:SI 2 "const_int_operand" " n")))
1383 (set (reg:BI SCC_REG)
1384 (leu:BI (minus:SI (match_dup 1)
1386 (match_operand:SI 3 "const_int_operand" " n")))]
1387 "INTVAL (operands[2]) == -INTVAL (operands[3])"
1388 "s_sub_u32\t%0, %1, %2"
1389 [(set_attr "type" "sop2")
1390 (set_attr "length" "4")])
1392 (define_insn "subcsi3_scalar"
1393 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg")
1394 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1395 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB"))
1396 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA")))
1397 (set (reg:BI SCC_REG)
1398 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1402 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1406 "s_subb_u32\t%0, %1, %2"
1407 [(set_attr "type" "sop2")
1408 (set_attr "length" "8")])
1410 (define_insn "subcsi3_scalar_zero"
1411 [(set (match_operand:SI 0 "register_operand" "=Sg")
1412 (minus:SI (zero_extend:SI (reg:BI SCC_REG))
1413 (match_operand:SI 1 "gcn_alu_operand" "SgA")))
1414 (set (reg:BI SCC_REG)
1415 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1))
1418 "s_subb_u32\t%0, %1, 0"
1419 [(set_attr "type" "sop2")
1420 (set_attr "length" "4")])
1425 ; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long
1427 ; The "s_mulk_i32" variant sets SCC to indicate overflow (which we don't care
1428 ; about here, but we need to indicate the clobbering).
1429 (define_insn "mulsi3"
1430 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v")
1431 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
1432 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))
1433 (clobber (match_scratch:BI 3 "=X,cs, X, X"))]
1436 s_mul_i32\t%0, %1, %2
1438 s_mul_i32\t%0, %1, %2
1439 v_mul_lo_u32\t%0, %1, %2"
1440 [(set_attr "type" "sop2,sopk,sop2,vop3a")
1441 (set_attr "length" "4,4,8,4")])
1443 (define_code_iterator any_extend [sign_extend zero_extend])
1444 (define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")])
1445 (define_code_attr su [(sign_extend "s") (zero_extend "u")])
1446 (define_code_attr u [(sign_extend "") (zero_extend "u")])
1447 (define_code_attr iu [(sign_extend "i") (zero_extend "u")])
1448 (define_code_attr e [(sign_extend "e") (zero_extend "")])
1450 (define_expand "<su>mulsi3_highpart"
1451 [(set (match_operand:SI 0 "register_operand" "")
1456 (match_operand:SI 1 "register_operand" ""))
1458 (match_operand:SI 2 "gcn_alu_operand" "")))
1462 if (REG_P (operands[2]))
1463 emit_insn (gen_<su>mulsi3_highpart_reg (operands[0], operands[1],
1466 emit_insn (gen_<su>mulsi3_highpart_imm (operands[0], operands[1],
1472 (define_insn "<su>mulsi3_highpart_reg"
1473 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1478 (match_operand:SI 1 "register_operand" "%Sg, v"))
1480 (match_operand:SI 2 "register_operand" "Sg,vSv")))
1484 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1485 v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1486 [(set_attr "type" "sop2,vop3a")
1487 (set_attr "length" "4,8")])
1489 (define_insn "<su>mulsi3_highpart_imm"
1490 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg,v")
1495 (match_operand:SI 1 "register_operand" "Sg,Sg,v"))
1496 (match_operand:DI 2 "gcn_32bit_immediate_operand" "A, B,A"))
1500 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1501 s_mul_hi<sgnsuffix>0\t%0, %1, %2
1502 v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1503 [(set_attr "type" "sop2,sop2,vop3a")
1504 (set_attr "length" "4,8,8")])
1506 (define_expand "<su>mulsidi3"
1507 [(set (match_operand:DI 0 "register_operand" "")
1508 (mult:DI (any_extend:DI
1509 (match_operand:SI 1 "register_operand" ""))
1511 (match_operand:SI 2 "nonmemory_operand" ""))))]
1514 if (REG_P (operands[2]))
1515 emit_insn (gen_<su>mulsidi3_reg (operands[0], operands[1], operands[2]));
1517 emit_insn (gen_<su>mulsidi3_imm (operands[0], operands[1], operands[2]));
1522 (define_insn_and_split "<su>mulsidi3_reg"
1523 [(set (match_operand:DI 0 "register_operand" "=&Sg, &v")
1524 (mult:DI (any_extend:DI
1525 (match_operand:SI 1 "register_operand" "%Sg, v"))
1527 (match_operand:SI 2 "register_operand" "Sg,vSv"))))]
1533 rtx dstlo = gen_lowpart (SImode, operands[0]);
1534 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1535 emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
1536 emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
1540 (define_insn_and_split "<su>mulsidi3_imm"
1541 [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v")
1542 (mult:DI (any_extend:DI
1543 (match_operand:SI 1 "register_operand" "Sg, Sg, v"))
1544 (match_operand:DI 2 "gcn_32bit_immediate_operand"
1548 "&& reload_completed"
1551 rtx dstlo = gen_lowpart (SImode, operands[0]);
1552 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1553 emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
1554 emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
1558 (define_insn_and_split "muldi3"
1559 [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v")
1560 (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v")
1561 (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A")))
1562 (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v"))
1563 (clobber (match_scratch:BI 4 "=cs, cs, X, X"))
1564 (clobber (match_scratch:DI 5 "=X, X,cV,cV"))]
1570 rtx tmp = operands[3];
1571 rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]);
1572 rtx op1lo = gcn_operand_part (DImode, operands[1], 0);
1573 rtx op1hi = gcn_operand_part (DImode, operands[1], 1);
1574 rtx op2lo = gcn_operand_part (DImode, operands[2], 0);
1575 rtx op2hi = gcn_operand_part (DImode, operands[2], 1);
1576 emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo));
1577 emit_insn (gen_mulsi3 (tmp, op1lo, op2hi));
1578 rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
1579 rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
1580 rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
1581 add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
1583 emit_insn (gen_mulsi3 (tmp, op1hi, op2lo));
1584 add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
1585 clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
1586 clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
1587 add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
1592 (define_insn "<u>mulhisi3"
1593 [(set (match_operand:SI 0 "register_operand" "=v")
1595 (any_extend:SI (match_operand:HI 1 "register_operand" "%v"))
1596 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))]
1598 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0"
1599 [(set_attr "type" "vop_sdwa")
1600 (set_attr "length" "8")])
1602 (define_insn "<u>mulqihi3_scalar"
1603 [(set (match_operand:HI 0 "register_operand" "=v")
1605 (any_extend:HI (match_operand:QI 1 "register_operand" "%v"))
1606 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))]
1608 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0"
1609 [(set_attr "type" "vop_sdwa")
1610 (set_attr "length" "8")])
1613 ;; {{{ ALU: generic 32-bit unop
1615 (define_code_iterator bitunop [not popcount])
1616 (define_code_attr popcount_extra_op [(not "") (popcount ", 0")])
1618 (define_insn "<expander>si2"
1619 [(set (match_operand:SI 0 "register_operand" "=Sg, v")
1621 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB")))
1622 (clobber (match_scratch:BI 2 "=cs, X"))]
1625 s_<s_mnemonic>0\t%0, %1
1626 v_<mnemonic>0\t%0, %1<popcount_extra_op>"
1627 [(set_attr "type" "sop1,vop1")
1628 (set_attr "length" "8")])
1630 (define_code_iterator countzeros [clz ctz])
1632 (define_insn "<expander>si2"
1633 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1635 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))]
1637 "s_<s_mnemonic>1\t%0, %1"
1638 [(set_attr "type" "sop1")
1639 (set_attr "length" "4,8")])
1641 ; The truncate ensures that a constant passed to operand 1 is treated as DImode
1642 (define_insn "<expander>di2"
1643 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1646 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))]
1648 "s_<s_mnemonic>1\t%0, %1"
1649 [(set_attr "type" "sop1")
1650 (set_attr "length" "4,8")])
1652 (define_insn "gcn_flbit<mode>_int"
1653 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg")
1654 (unspec:SI [(match_operand:SIDI 1 "gcn_alu_operand" "SgA, B")]
1658 if (<MODE>mode == SImode)
1659 return "s_flbit_i32\t%0, %1";
1661 return "s_flbit_i32_i64\t%0, %1";
1663 [(set_attr "type" "sop1")
1664 (set_attr "length" "4,8")])
1666 (define_expand "clrsb<mode>2"
1667 [(set (match_operand:SI 0 "register_operand" "")
1668 (clrsb:SI (match_operand:SIDI 1 "gcn_alu_operand" "")))]
1671 rtx tmp = gen_reg_rtx (SImode);
1672 /* FLBIT_I* counts sign or zero bits at the most-significant end of the
1673 input register (and returns -1 for 0/-1 inputs). We want the number of
1674 *redundant* bits (i.e. that value minus one), and an answer of 31/63 for
1675 0/-1 inputs. We can do that in three instructions... */
1676 emit_insn (gen_gcn_flbit<mode>_int (tmp, operands[1]));
1677 emit_insn (gen_uminsi3 (tmp, tmp,
1678 gen_int_mode (GET_MODE_BITSIZE (<MODE>mode),
1680 /* If we put this last, it can potentially be folded into a subsequent
1681 arithmetic operation. */
1682 emit_insn (gen_subsi3 (operands[0], tmp, const1_rtx));
1687 ;; {{{ ALU: generic 32-bit binop
1689 ; No plus and mult - they have variant with 16bit immediate
1690 ; and thus are defined later.
1691 (define_code_iterator binop [and ior xor smin smax umin umax
1692 ashift lshiftrt ashiftrt])
1693 (define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax])
1694 (define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt])
1696 (define_insn "<expander>si3"
1697 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD")
1698 (vec_and_scalar_com:SI
1699 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0")
1700 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v")))
1701 (clobber (match_scratch:BI 3 "= cs, X, X"))]
1704 s_<mnemonic>0\t%0, %1, %2
1705 v_<mnemonic>0\t%0, %1, %2
1706 ds_<mnemonic>0\t%A0, %2%O0"
1707 [(set_attr "type" "sop2,vop2,ds")
1708 (set_attr "length" "8")])
1710 (define_insn "<expander>si3"
1711 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v")
1712 (vec_and_scalar_nocom:SI
1713 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v")
1714 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB")))
1715 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1718 s_<mnemonic>0\t%0, %1, %2
1719 s_<mnemonic>0\t%0, %1, %2
1720 v_<revmnemonic>0\t%0, %2, %1"
1721 [(set_attr "type" "sop2,sop2,vop2")
1722 (set_attr "length" "8")])
1724 (define_expand "<expander>si3_scc"
1725 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand")
1727 (match_operand:SI 1 "gcn_valu_src0_operand")
1728 (match_operand:SI 2 "gcn_alu_operand")))
1729 (clobber (reg:BI SCC_REG))])]
1734 ;; {{{ ALU: generic 64-bit
1736 (define_insn_and_split "one_cmpldi2"
1737 [(set (match_operand:DI 0 "register_operand" "=Sg, v")
1738 (not:DI (match_operand:DI 1 "gcn_alu_operand" "SgA,vSvDB")))
1739 (clobber (match_scratch:BI 2 "=cs, X"))]
1743 [(parallel [(set (match_dup 3) (not:SI (match_dup 4)))
1744 (clobber (match_dup 2))])
1745 (parallel [(set (match_dup 5) (not:SI (match_dup 6)))
1746 (clobber (match_dup 2))])]
1748 operands[3] = gcn_operand_part (DImode, operands[0], 0);
1749 operands[4] = gcn_operand_part (DImode, operands[1], 0);
1750 operands[5] = gcn_operand_part (DImode, operands[0], 1);
1751 operands[6] = gcn_operand_part (DImode, operands[1], 1);
1753 [(set_attr "type" "mult")]
1756 (define_code_iterator vec_and_scalar64_com [and ior xor])
1758 (define_insn_and_split "<expander>di3"
1759 [(set (match_operand:DI 0 "register_operand" "= Sg, v")
1760 (vec_and_scalar64_com:DI
1761 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB")
1762 (match_operand:DI 2 "gcn_alu_operand" " SgC, v")))
1763 (clobber (match_scratch:BI 3 "= cs, X"))]
1766 s_<mnemonic>0\t%0, %1, %2
1768 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
1769 [(parallel [(set (match_dup 4)
1770 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6)))
1771 (clobber (match_dup 3))])
1772 (parallel [(set (match_dup 7)
1773 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9)))
1774 (clobber (match_dup 3))])]
1776 operands[4] = gcn_operand_part (DImode, operands[0], 0);
1777 operands[5] = gcn_operand_part (DImode, operands[1], 0);
1778 operands[6] = gcn_operand_part (DImode, operands[2], 0);
1779 operands[7] = gcn_operand_part (DImode, operands[0], 1);
1780 operands[8] = gcn_operand_part (DImode, operands[1], 1);
1781 operands[9] = gcn_operand_part (DImode, operands[2], 1);
1783 [(set_attr "type" "sop2,vop2")
1784 (set_attr "length" "8")])
1786 (define_insn "<expander>di3"
1787 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v")
1788 (vec_and_scalar_nocom:DI
1789 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v")
1790 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC")))
1791 (clobber (match_scratch:BI 3 "=cs, cs, X"))]
1794 s_<mnemonic>0\t%0, %1, %2
1795 s_<mnemonic>0\t%0, %1, %2
1796 v_<revmnemonic>0\t%0, %2, %1"
1797 [(set_attr "type" "sop2,sop2,vop2")
1798 (set_attr "length" "8")])
1801 ;; {{{ ALU: generic 128-bit binop
1803 ; TImode shifts can't be synthesized by the middle-end
1804 (define_expand "<expander>ti3"
1805 [(set (match_operand:TI 0 "register_operand")
1806 (vec_and_scalar_nocom:TI
1807 (match_operand:TI 1 "gcn_alu_operand")
1808 (match_operand:SI 2 "gcn_alu_operand")))]
1811 rtx dest = operands[0];
1812 rtx src = operands[1];
1813 rtx shift = operands[2];
1815 enum {ashr, lshr, ashl} shiftop = <expander>;
1816 rtx (*inverse_shift_fn) (rtx, rtx, rtx)
1817 = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3);
1818 rtx (*logical_shift_fn) (rtx, rtx, rtx)
1819 = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3);
1821 /* We shift "from" one subreg "to" the other, according to shiftop. */
1822 int from = (shiftop == ashl ? 0 : 8);
1823 int to = (shiftop == ashl ? 8 : 0);
1824 rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from);
1825 rtx destto = simplify_gen_subreg (DImode, dest, TImode, to);
1826 rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from);
1827 rtx srcto = simplify_gen_subreg (DImode, src, TImode, to);
1829 int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1);
1830 enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison
1831 = (!CONST_INT_P (shift) ? RUNTIME
1832 : shiftval == 0 ? ZERO
1833 : shiftval < 64 ? SMALL
1836 rtx large_label, zero_label, exit_label;
1838 if (shiftcomparison == RUNTIME)
1840 zero_label = gen_label_rtx ();
1841 large_label = gen_label_rtx ();
1842 exit_label = gen_label_rtx ();
1844 rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx);
1845 emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label));
1847 rtx sixtyfour = GEN_INT (64);
1848 cond = gen_rtx_GE (VOIDmode, shift, sixtyfour);
1849 emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label));
1852 if (shiftcomparison == SMALL || shiftcomparison == RUNTIME)
1854 /* Shift both parts by the same amount, then patch in the bits that
1856 This does *not* work for zero-length shifts. */
1857 rtx tmpto1 = gen_reg_rtx (DImode);
1858 rtx tmpto2 = gen_reg_rtx (DImode);
1859 emit_insn (gen_<expander>di3 (destfrom, srcfrom, shift));
1860 emit_insn (logical_shift_fn (tmpto1, srcto, shift));
1861 rtx lessershiftval = gen_reg_rtx (SImode);
1862 emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift));
1863 emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval));
1864 emit_insn (gen_iordi3 (destto, tmpto1, tmpto2));
1867 if (shiftcomparison == RUNTIME)
1869 emit_jump_insn (gen_jump (exit_label));
1872 emit_label (zero_label);
1875 if (shiftcomparison == ZERO || shiftcomparison == RUNTIME)
1876 emit_move_insn (dest, src);
1878 if (shiftcomparison == RUNTIME)
1880 emit_jump_insn (gen_jump (exit_label));
1883 emit_label (large_label);
1886 if (shiftcomparison == LARGE || shiftcomparison == RUNTIME)
1888 /* Do the shift within one part, and set the other part appropriately.
1889 Shifts of 128+ bits are an error. */
1890 rtx lessershiftval = gen_reg_rtx (SImode);
1891 emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64)));
1892 emit_insn (gen_<expander>di3 (destto, srcfrom, lessershiftval));
1893 if (shiftop == ashr)
1894 emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63)));
1896 emit_move_insn (destfrom, const0_rtx);
1899 if (shiftcomparison == RUNTIME)
1900 emit_label (exit_label);
1908 ; Each compute unit has it's own L1 cache. The L2 cache is shared between
1909 ; all the compute units. Any load or store instruction can skip L1 and
1910 ; access L2 directly using the "glc" flag. Atomic instructions also skip
1911 ; L1. The L1 cache can be flushed and invalidated using instructions.
1913 ; Therefore, in order for "acquire" and "release" atomic modes to work
1914 ; correctly across compute units we must flush before each "release"
1915 ; and invalidate the cache after each "acquire". It might seem like
1916 ; invalidation could be safely done before an "acquire", but since each
1917 ; compute unit can run up to 40 threads simultaneously, all reading values
1918 ; into the L1 cache, this is not actually safe.
1920 ; Additionally, scalar flat instructions access L2 via a different cache
1921 ; (the "constant cache"), so they have separate constrol instructions. We
1922 ; do not attempt to invalidate both caches at once; instead, atomics
1923 ; operating on scalar flat pointers will flush the constant cache, and
1924 ; atomics operating on flat or global pointers will flush L1. It is up to
1925 ; the programmer to get this right.
1927 (define_code_iterator atomicops [plus minus and ior xor])
1928 (define_mode_attr X [(SI "") (DI "_X2")])
1930 ;; TODO compare_and_swap test_and_set inc dec
1931 ;; Hardware also supports min and max, but GCC does not.
1933 (define_expand "memory_barrier"
1935 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1938 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1939 MEM_VOLATILE_P (operands[0]) = 1;
1942 (define_insn "*memory_barrier"
1943 [(set (match_operand:BLK 0)
1944 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1945 "TARGET_WBINVL1_CACHE"
1946 "buffer_wbinvl1_vol"
1947 [(set_attr "type" "mubuf")
1948 (set_attr "length" "4")])
1950 (define_insn "*memory_barrier"
1951 [(set (match_operand:BLK 0)
1952 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
1954 "buffer_gl1_inv\;buffer_gl0_inv"
1955 [(set_attr "type" "mult")
1956 (set_attr "length" "8")])
1958 ; FIXME: These patterns have been disabled as they do not seem to work
1959 ; reliably - they can cause hangs or incorrect results.
1960 ; TODO: flush caches according to memory model
1961 (define_insn "atomic_fetch_<bare_mnemonic><mode>"
1962 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
1963 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
1965 (unspec_volatile:SIDI
1968 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))]
1970 (use (match_operand 3 "const_int_operand"))]
1973 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
1974 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
1975 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
1976 [(set_attr "type" "smem,flat,flat")
1977 (set_attr "length" "12")])
1979 ; FIXME: These patterns are disabled because the instructions don't
1980 ; seem to work as advertised. Specifically, OMP "team distribute"
1981 ; reductions apparently "lose" some of the writes, similar to what
1982 ; you might expect from a concurrent non-atomic read-modify-write.
1983 ; TODO: flush caches according to memory model
1984 (define_insn "atomic_<bare_mnemonic><mode>"
1985 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM")
1986 (unspec_volatile:SIDI
1989 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))]
1991 (use (match_operand 2 "const_int_operand"))]
1994 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0)
1995 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
1996 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
1997 [(set_attr "type" "smem,flat,flat")
1998 (set_attr "length" "12")])
2000 (define_mode_attr x2 [(SI "DI") (DI "TI")])
2001 (define_mode_attr size [(SI "4") (DI "8")])
2002 (define_mode_attr bitsize [(SI "32") (DI "64")])
2004 (define_expand "sync_compare_and_swap<mode>"
2005 [(match_operand:SIDI 0 "register_operand")
2006 (match_operand:SIDI 1 "memory_operand")
2007 (match_operand:SIDI 2 "register_operand")
2008 (match_operand:SIDI 3 "register_operand")]
2011 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS)
2013 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0],
2020 /* Operands 2 and 3 must be placed in consecutive registers, and passed
2021 as a combined value. */
2022 rtx src_cmp = gen_reg_rtx (<x2>mode);
2023 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]);
2024 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]);
2025 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0],
2031 (define_insn "sync_compare_and_swap<mode>_insn"
2032 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
2033 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
2035 (unspec_volatile:SIDI
2036 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")]
2040 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
2041 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
2042 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
2043 [(set_attr "type" "smem,flat,flat")
2044 (set_attr "length" "12")
2045 (set_attr "delayeduse" "*,yes,yes")])
2047 (define_insn "sync_compare_and_swap<mode>_lds_insn"
2048 [(set (match_operand:SIDI 0 "register_operand" "= v")
2049 (unspec_volatile:SIDI
2050 [(match_operand:SIDI 1 "memory_operand" "+RL")]
2053 (unspec_volatile:SIDI
2054 [(match_operand:SIDI 2 "register_operand" " v")
2055 (match_operand:SIDI 3 "register_operand" " v")]
2060 return "ds_cmpstore_rtn_b<bitsize> %0, %1, %3, %2\;s_waitcnt\tlgkmcnt(0)";
2062 return "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)";
2064 [(set_attr "type" "ds")
2065 (set_attr "length" "12")])
2067 (define_insn "atomic_load<mode>"
2068 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
2069 (unspec_volatile:SIDI
2070 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")]
2072 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
2075 /* FIXME: RDNA cache instructions may be too conservative? */
2076 switch (INTVAL (operands[2]))
2078 case MEMMODEL_RELAXED:
2079 switch (which_alternative)
2082 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
2084 return (TARGET_RDNA2 /* Not GFX11. */
2085 ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0"
2086 : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0");
2088 return (TARGET_RDNA2 /* Not GFX11. */
2089 ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)"
2090 : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)");
2093 case MEMMODEL_CONSUME:
2094 case MEMMODEL_ACQUIRE:
2095 case MEMMODEL_SYNC_ACQUIRE:
2096 switch (which_alternative)
2099 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
2102 return (TARGET_RDNA2
2103 ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0\;"
2104 "buffer_gl1_inv\;buffer_gl0_inv"
2106 ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
2107 "buffer_gl1_inv\;buffer_gl0_inv"
2108 : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
2109 "buffer_wbinvl1_vol");
2111 return (TARGET_RDNA2
2112 ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)\;"
2113 "buffer_gl1_inv\;buffer_gl0_inv"
2115 ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
2116 "buffer_gl1_inv\;buffer_gl0_inv"
2117 : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
2118 "buffer_wbinvl1_vol");
2121 case MEMMODEL_ACQ_REL:
2122 case MEMMODEL_SEQ_CST:
2123 case MEMMODEL_SYNC_SEQ_CST:
2124 switch (which_alternative)
2127 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
2128 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
2130 return (TARGET_RDNA2
2131 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc dlc\;"
2132 "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
2134 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
2135 "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
2136 : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
2137 "s_waitcnt\t0\;buffer_wbinvl1_vol");
2139 return (TARGET_RDNA2
2140 ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc dlc\;"
2141 "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
2143 ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
2144 "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
2145 : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
2146 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
2152 [(set_attr "type" "smem,flat,flat")
2153 (set_attr "length" "28")
2154 (set_attr "rdna" "no,*,*")])
2156 (define_insn "atomic_store<mode>"
2157 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
2158 (unspec_volatile:SIDI
2159 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")]
2161 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
2164 switch (INTVAL (operands[2]))
2166 case MEMMODEL_RELAXED:
2167 switch (which_alternative)
2170 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
2172 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
2174 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
2177 case MEMMODEL_RELEASE:
2178 case MEMMODEL_SYNC_RELEASE:
2179 switch (which_alternative)
2182 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
2184 return (TARGET_GLn_CACHE
2185 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
2186 : TARGET_WBINVL1_CACHE
2187 ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"
2188 : "error: cache architectire unspecified");
2190 return (TARGET_GLn_CACHE
2191 ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
2192 : TARGET_WBINVL1_CACHE
2193 ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"
2194 : "error: cache architecture unspecified");
2197 case MEMMODEL_ACQ_REL:
2198 case MEMMODEL_SEQ_CST:
2199 case MEMMODEL_SYNC_SEQ_CST:
2200 switch (which_alternative)
2203 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
2204 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
2206 return (TARGET_GLn_CACHE
2207 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
2208 "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
2209 : TARGET_WBINVL1_CACHE
2210 ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
2211 "s_waitcnt\t0\;buffer_wbinvl1_vol"
2212 : "error: cache architecture unspecified");
2214 return (TARGET_GLn_CACHE
2215 ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
2216 "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
2217 : TARGET_WBINVL1_CACHE
2218 ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
2219 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
2220 : "error: cache architecture unspecified");
2226 [(set_attr "type" "smem,flat,flat")
2227 (set_attr "length" "28")
2228 (set_attr "rdna" "no,*,*")])
2230 (define_insn "atomic_exchange<mode>"
2231 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
2232 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM"))
2234 (unspec_volatile:SIDI
2235 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")]
2237 (use (match_operand 3 "immediate_operand"))]
2240 switch (INTVAL (operands[3]))
2242 case MEMMODEL_RELAXED:
2243 switch (which_alternative)
2246 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
2248 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
2250 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2251 "s_waitcnt\tvmcnt(0)";
2254 case MEMMODEL_CONSUME:
2255 case MEMMODEL_ACQUIRE:
2256 case MEMMODEL_SYNC_ACQUIRE:
2257 switch (which_alternative)
2260 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
2261 "s_dcache_wb_vol\;s_dcache_inv_vol";
2263 return (TARGET_GLn_CACHE
2264 ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
2265 "buffer_gl1_inv\;buffer_gl0_inv"
2266 : TARGET_WBINVL1_CACHE
2267 ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
2268 "buffer_wbinvl1_vol"
2269 : "error: cache architecture unspecified");
2271 return (TARGET_GLn_CACHE
2272 ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2273 "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
2274 : TARGET_WBINVL1_CACHE
2275 ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2276 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
2277 : "error: cache architecture unspecified");
2280 case MEMMODEL_RELEASE:
2281 case MEMMODEL_SYNC_RELEASE:
2282 switch (which_alternative)
2285 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
2286 "s_waitcnt\tlgkmcnt(0)";
2288 return (TARGET_GLn_CACHE
2289 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2291 : TARGET_WBINVL1_CACHE
2292 ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2294 : "error: cache architecture unspecified");
2296 return (TARGET_GLn_CACHE
2297 ? "buffer_gl1_inv\;buffer_gl0_inv\;"
2298 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2299 "s_waitcnt\tvmcnt(0)"
2300 : TARGET_WBINVL1_CACHE
2301 ? "buffer_wbinvl1_vol\;"
2302 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2303 "s_waitcnt\tvmcnt(0)"
2304 : "error: cache architecture unspecified");
2307 case MEMMODEL_ACQ_REL:
2308 case MEMMODEL_SEQ_CST:
2309 case MEMMODEL_SYNC_SEQ_CST:
2310 switch (which_alternative)
2313 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
2314 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
2316 return (TARGET_GLn_CACHE
2317 ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2318 "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
2319 : TARGET_WBINVL1_CACHE
2320 ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
2321 "s_waitcnt\t0\;buffer_wbinvl1_vol"
2322 : "error: cache architecture unspecified");
2324 return (TARGET_GLn_CACHE
2325 ? "buffer_gl1_inv\;buffer_gl0_inv\;"
2326 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2327 "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
2328 : TARGET_WBINVL1_CACHE
2329 ? "buffer_wbinvl1_vol\;"
2330 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
2331 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
2332 : "error: cache architecture unspecified");
2338 [(set_attr "type" "smem,flat,flat")
2339 (set_attr "length" "28")
2340 (set_attr "rdna" "no,*,*")])
2343 ;; {{{ OpenACC / OpenMP
2345 (define_expand "oacc_dim_size"
2346 [(match_operand:SI 0 "register_operand")
2347 (match_operand:SI 1 "const_int_operand")]
2350 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1]));
2351 emit_move_insn (operands[0], gen_lowpart (SImode, tmp));
2355 (define_expand "oacc_dim_pos"
2356 [(match_operand:SI 0 "register_operand")
2357 (match_operand:SI 1 "const_int_operand")]
2360 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1])));
2364 (define_expand "gcn_wavefront_barrier"
2366 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
2369 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2370 MEM_VOLATILE_P (operands[0]) = 1;
2373 (define_insn "*gcn_wavefront_barrier"
2374 [(set (match_operand:BLK 0 "")
2375 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))]
2378 [(set_attr "type" "sopp")])
2380 (define_expand "oacc_fork"
2381 [(set (match_operand:SI 0 "")
2382 (match_operand:SI 1 ""))
2383 (use (match_operand:SI 2 ""))]
2386 /* We need to have oacc_fork/oacc_join named patterns as a pair,
2387 but the fork isn't actually used. */
2391 (define_expand "oacc_join"
2392 [(set (match_operand:SI 0 "")
2393 (match_operand:SI 1 ""))
2394 (use (match_operand:SI 2 ""))]
2397 emit_insn (gen_gcn_wavefront_barrier ());
2403 (include "gcn-valu.md")