1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
33 UNSPEC_FPINT_NEARBYINT
53 (define_c_enum "unspecv" [
73 (define_attr "subregs_ok" "false,true"
74 (const_string "false"))
76 (define_attr "atomic" "false,true"
77 (const_string "false"))
79 ;; The nvptx operand predicates, in general, don't permit subregs and
80 ;; only literal constants, which differ from the generic ones, which
81 ;; permit subregs and symbolc constants (as appropriate)
82 (define_predicate "nvptx_register_operand"
85 return register_operand (op, mode);
88 (define_predicate "nvptx_nonimmediate_operand"
89 (match_code "mem,reg")
91 return (REG_P (op) ? register_operand (op, mode)
92 : memory_operand (op, mode));
95 (define_predicate "nvptx_nonmemory_operand"
96 (match_code "reg,const_int,const_double")
98 return (REG_P (op) ? register_operand (op, mode)
99 : immediate_operand (op, mode));
102 (define_predicate "const0_operand"
103 (and (match_code "const_int")
104 (match_test "op == const0_rtx")))
106 ;; True if this operator is valid for predication.
107 (define_predicate "predicate_operator"
108 (match_code "eq,ne"))
110 (define_predicate "ne_operator"
113 (define_predicate "nvptx_comparison_operator"
114 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
116 (define_predicate "nvptx_float_comparison_operator"
117 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
119 ;; Test for a valid operand for a call instruction.
120 (define_predicate "call_insn_operand"
121 (match_code "symbol_ref,reg")
123 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
126 ;; Return true if OP is a call with parallel USEs of the argument
128 (define_predicate "call_operation"
129 (match_code "parallel")
131 int arg_end = XVECLEN (op, 0);
133 for (int i = 1; i < arg_end; i++)
135 rtx elt = XVECEXP (op, 0, i);
137 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
143 (define_attr "predicable" "false,true"
144 (const_string "true"))
147 [(match_operator 0 "predicate_operator"
148 [(match_operand:BI 1 "nvptx_register_operand" "")
149 (match_operand:BI 2 "const0_operand" "")])]
154 (define_constraint "P0"
155 "An integer with the value 0."
156 (and (match_code "const_int")
157 (match_test "ival == 0")))
159 (define_constraint "P1"
160 "An integer with the value 1."
161 (and (match_code "const_int")
162 (match_test "ival == 1")))
164 (define_constraint "Pn"
165 "An integer with the value -1."
166 (and (match_code "const_int")
167 (match_test "ival == -1")))
169 (define_constraint "R"
173 (define_constraint "Ia"
174 "Any integer constant."
175 (and (match_code "const_int") (match_test "true")))
177 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
178 (define_mode_iterator QHSDIM [QI HI SI DI])
179 (define_mode_iterator HSDIM [HI SI DI])
180 (define_mode_iterator BHSDIM [BI HI SI DI])
181 (define_mode_iterator SDIM [SI DI])
182 (define_mode_iterator SDISDFM [SI DI SF DF])
183 (define_mode_iterator QHIM [QI HI])
184 (define_mode_iterator QHSIM [QI HI SI])
185 (define_mode_iterator SDFM [SF DF])
186 (define_mode_iterator SDCM [SC DC])
187 (define_mode_iterator BITS [SI SF])
188 (define_mode_iterator BITD [DI DF])
189 (define_mode_iterator VECIM [V2SI V2DI])
191 ;; This mode iterator allows :P to be used for patterns that operate on
192 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
193 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
195 ;; We should get away with not defining memory alternatives, since we don't
196 ;; get variables in this mode and pseudos are never spilled.
198 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
199 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
202 %.\\tmov%t0\\t%0, %1;
203 %.\\tsetp.eq.u32\\t%0, 1, 0;
204 %.\\tsetp.eq.u32\\t%0, 1, 1;")
206 (define_insn "*mov<mode>_insn"
207 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
208 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
209 "!MEM_P (operands[0]) || REG_P (operands[1])"
211 if (which_alternative == 1)
212 return "%.\\tld%A1%u1\\t%0, %1;";
213 if (which_alternative == 2)
214 return "%.\\tst%A0%u0\\t%0, %1;";
216 return nvptx_output_mov_insn (operands[0], operands[1]);
218 [(set_attr "subregs_ok" "true")])
220 (define_insn "*mov<mode>_insn"
221 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
222 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
223 "!MEM_P (operands[0]) || REG_P (operands[1])"
225 if (which_alternative == 1)
226 return "%.\\tld%A1%u1\\t%0, %1;";
227 if (which_alternative == 2)
228 return "%.\\tst%A0%u0\\t%0, %1;";
230 return nvptx_output_mov_insn (operands[0], operands[1]);
232 [(set_attr "subregs_ok" "true")])
234 (define_insn "*mov<mode>_insn"
235 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
236 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
237 "!MEM_P (operands[0]) || REG_P (operands[1])"
239 if (which_alternative == 1)
240 return "%.\\tld%A1%u0\\t%0, %1;";
241 if (which_alternative == 2)
242 return "%.\\tst%A0%u1\\t%0, %1;";
244 return nvptx_output_mov_insn (operands[0], operands[1]);
246 [(set_attr "subregs_ok" "true")])
248 (define_insn "load_arg_reg<mode>"
249 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
250 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
253 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
255 (define_insn "load_arg_reg<mode>"
256 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
257 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
260 "%.\\tmov%t0\\t%0, %%ar%1;")
262 (define_expand "mov<mode>"
263 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
264 (match_operand:VECIM 1 "general_operand" ""))]
267 if (MEM_P (operands[0]) && !REG_P (operands[1]))
269 rtx tmp = gen_reg_rtx (<MODE>mode);
270 emit_move_insn (tmp, operands[1]);
271 emit_move_insn (operands[0], tmp);
276 (define_expand "mov<mode>"
277 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
278 (match_operand:QHSDISDFM 1 "general_operand" ""))]
281 if (MEM_P (operands[0]) && !REG_P (operands[1]))
283 rtx tmp = gen_reg_rtx (<MODE>mode);
284 emit_move_insn (tmp, operands[1]);
285 emit_move_insn (operands[0], tmp);
289 if (GET_CODE (operands[1]) == LABEL_REF)
290 sorry ("target cannot support label values");
293 (define_insn "zero_extendqihi2"
294 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
295 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
298 %.\\tcvt.u16.u%T1\\t%0, %1;
299 %.\\tld%A1.u8\\t%0, %1;"
300 [(set_attr "subregs_ok" "true")])
302 (define_insn "zero_extend<mode>si2"
303 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
304 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
307 %.\\tcvt.u32.u%T1\\t%0, %1;
308 %.\\tld%A1.u%T1\\t%0, %1;"
309 [(set_attr "subregs_ok" "true")])
311 (define_insn "zero_extend<mode>di2"
312 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
313 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
316 %.\\tcvt.u64.u%T1\\t%0, %1;
317 %.\\tld%A1%u1\\t%0, %1;"
318 [(set_attr "subregs_ok" "true")])
320 (define_insn "extend<mode>si2"
321 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
322 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
325 %.\\tcvt.s32.s%T1\\t%0, %1;
326 %.\\tld%A1.s%T1\\t%0, %1;"
327 [(set_attr "subregs_ok" "true")])
329 (define_insn "extend<mode>di2"
330 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
331 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
334 %.\\tcvt.s64.s%T1\\t%0, %1;
335 %.\\tld%A1.s%T1\\t%0, %1;"
336 [(set_attr "subregs_ok" "true")])
338 (define_insn "trunchiqi2"
339 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
340 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
343 %.\\tcvt%t0.u16\\t%0, %1;
344 %.\\tst%A0.u8\\t%0, %1;"
345 [(set_attr "subregs_ok" "true")])
347 (define_insn "truncsi<mode>2"
348 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
349 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
352 %.\\tcvt%t0.u32\\t%0, %1;
353 %.\\tst%A0.u%T0\\t%0, %1;"
354 [(set_attr "subregs_ok" "true")])
356 (define_insn "truncdi<mode>2"
357 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
358 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
361 %.\\tcvt%t0.u64\\t%0, %1;
362 %.\\tst%A0.u%T0\\t%0, %1;"
363 [(set_attr "subregs_ok" "true")])
365 ;; Integer arithmetic
367 (define_insn "add<mode>3"
368 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
369 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
370 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
372 "%.\\tadd%t0\\t%0, %1, %2;")
374 (define_insn "sub<mode>3"
375 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
376 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
377 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
379 "%.\\tsub%t0\\t%0, %1, %2;")
381 (define_insn "mul<mode>3"
382 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
383 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
384 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
386 "%.\\tmul.lo%t0\\t%0, %1, %2;")
388 (define_insn "*mad<mode>3"
389 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
390 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
391 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
392 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
394 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
396 (define_insn "div<mode>3"
397 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
398 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
399 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
401 "%.\\tdiv.s%T0\\t%0, %1, %2;")
403 (define_insn "udiv<mode>3"
404 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
405 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
406 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
408 "%.\\tdiv.u%T0\\t%0, %1, %2;")
410 (define_insn "mod<mode>3"
411 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
412 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
413 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
415 "%.\\trem.s%T0\\t%0, %1, %2;")
417 (define_insn "umod<mode>3"
418 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
419 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
420 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
422 "%.\\trem.u%T0\\t%0, %1, %2;")
424 (define_insn "smin<mode>3"
425 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
426 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
427 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
429 "%.\\tmin.s%T0\\t%0, %1, %2;")
431 (define_insn "umin<mode>3"
432 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
433 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
434 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
436 "%.\\tmin.u%T0\\t%0, %1, %2;")
438 (define_insn "smax<mode>3"
439 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
440 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
441 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
443 "%.\\tmax.s%T0\\t%0, %1, %2;")
445 (define_insn "umax<mode>3"
446 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
447 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
448 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
450 "%.\\tmax.u%T0\\t%0, %1, %2;")
452 (define_insn "abs<mode>2"
453 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
454 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
456 "%.\\tabs.s%T0\\t%0, %1;")
458 (define_insn "neg<mode>2"
459 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
460 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
462 "%.\\tneg.s%T0\\t%0, %1;")
464 (define_insn "one_cmpl<mode>2"
465 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
466 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
468 "%.\\tnot.b%T0\\t%0, %1;")
470 (define_insn "bitrev<mode>2"
471 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
472 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
475 "%.\\tbrev.b%T0\\t%0, %1;")
477 (define_insn "clz<mode>2"
478 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
479 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
481 "%.\\tclz.b%T1\\t%0, %1;")
483 (define_expand "ctz<mode>2"
484 [(set (match_operand:SI 0 "nvptx_register_operand" "")
485 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
488 rtx tmpreg = gen_reg_rtx (<MODE>mode);
489 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
490 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
496 (define_insn "ashl<mode>3"
497 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
498 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
499 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
501 "%.\\tshl.b%T0\\t%0, %1, %2;")
503 (define_insn "ashr<mode>3"
504 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
505 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
506 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
508 "%.\\tshr.s%T0\\t%0, %1, %2;")
510 (define_insn "lshr<mode>3"
511 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
512 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
513 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
515 "%.\\tshr.u%T0\\t%0, %1, %2;")
517 ;; Logical operations
519 (define_insn "and<mode>3"
520 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
521 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
522 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
524 "%.\\tand.b%T0\\t%0, %1, %2;")
526 (define_insn "ior<mode>3"
527 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
528 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
529 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
531 "%.\\tor.b%T0\\t%0, %1, %2;")
533 (define_insn "xor<mode>3"
534 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
535 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
536 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
538 "%.\\txor.b%T0\\t%0, %1, %2;")
540 ;; Comparisons and branches
542 (define_insn "*cmp<mode>"
543 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
544 (match_operator:BI 1 "nvptx_comparison_operator"
545 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
546 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
548 "%.\\tsetp%c1\\t%0, %2, %3;")
550 (define_insn "*cmp<mode>"
551 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
552 (match_operator:BI 1 "nvptx_float_comparison_operator"
553 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
554 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
556 "%.\\tsetp%c1\\t%0, %2, %3;")
560 (label_ref (match_operand 0 "" "")))]
564 (define_insn "br_true"
566 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
568 (label_ref (match_operand 1 "" ""))
572 [(set_attr "predicable" "false")])
574 (define_insn "br_false"
576 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
578 (label_ref (match_operand 1 "" ""))
582 [(set_attr "predicable" "false")])
584 ;; unified conditional branch
585 (define_insn "br_true_uni"
586 [(set (pc) (if_then_else
587 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
588 UNSPEC_BR_UNIFIED) (const_int 0))
589 (label_ref (match_operand 1 "" "")) (pc)))]
591 "%j0\\tbra.uni\\t%l1;"
592 [(set_attr "predicable" "false")])
594 (define_insn "br_false_uni"
595 [(set (pc) (if_then_else
596 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
597 UNSPEC_BR_UNIFIED) (const_int 0))
598 (label_ref (match_operand 1 "" "")) (pc)))]
600 "%J0\\tbra.uni\\t%l1;"
601 [(set_attr "predicable" "false")])
603 (define_expand "cbranch<mode>4"
605 (if_then_else (match_operator 0 "nvptx_comparison_operator"
606 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
607 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
608 (label_ref (match_operand 3 "" ""))
612 rtx t = nvptx_expand_compare (operands[0]);
614 operands[1] = XEXP (t, 0);
615 operands[2] = XEXP (t, 1);
618 (define_expand "cbranch<mode>4"
620 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
621 [(match_operand:SDFM 1 "nvptx_register_operand" "")
622 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
623 (label_ref (match_operand 3 "" ""))
627 rtx t = nvptx_expand_compare (operands[0]);
629 operands[1] = XEXP (t, 0);
630 operands[2] = XEXP (t, 1);
633 (define_expand "cbranchbi4"
635 (if_then_else (match_operator 0 "predicate_operator"
636 [(match_operand:BI 1 "nvptx_register_operand" "")
637 (match_operand:BI 2 "const0_operand" "")])
638 (label_ref (match_operand 3 "" ""))
643 ;; Conditional stores
645 (define_insn "setcc_from_bi"
646 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
647 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
650 "%.\\tselp%t0 %0,-1,0,%1;")
652 (define_insn "sel_true<mode>"
653 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
655 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
656 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
657 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
659 "%.\\tselp%t0\\t%0, %2, %3, %1;")
661 (define_insn "sel_true<mode>"
662 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
664 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
665 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
666 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
668 "%.\\tselp%t0\\t%0, %2, %3, %1;")
670 (define_insn "sel_false<mode>"
671 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
673 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
674 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
675 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
677 "%.\\tselp%t0\\t%0, %3, %2, %1;")
679 (define_insn "sel_false<mode>"
680 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
682 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
683 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
684 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
686 "%.\\tselp%t0\\t%0, %3, %2, %1;")
688 (define_insn "setcc_int<mode>"
689 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
690 (match_operator:SI 1 "nvptx_comparison_operator"
691 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
692 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
694 "%.\\tset%t0%c1\\t%0, %2, %3;")
696 (define_insn "setcc_int<mode>"
697 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
698 (match_operator:SI 1 "nvptx_float_comparison_operator"
699 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
700 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
702 "%.\\tset%t0%c1\\t%0, %2, %3;")
704 (define_insn "setcc_float<mode>"
705 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
706 (match_operator:SF 1 "nvptx_comparison_operator"
707 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
708 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
710 "%.\\tset%t0%c1\\t%0, %2, %3;")
712 (define_insn "setcc_float<mode>"
713 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
714 (match_operator:SF 1 "nvptx_float_comparison_operator"
715 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
716 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
718 "%.\\tset%t0%c1\\t%0, %2, %3;")
720 (define_expand "cstorebi4"
721 [(set (match_operand:SI 0 "nvptx_register_operand")
722 (match_operator:SI 1 "ne_operator"
723 [(match_operand:BI 2 "nvptx_register_operand")
724 (match_operand:BI 3 "const0_operand")]))]
728 (define_expand "cstore<mode>4"
729 [(set (match_operand:SI 0 "nvptx_register_operand")
730 (match_operator:SI 1 "nvptx_comparison_operator"
731 [(match_operand:HSDIM 2 "nvptx_register_operand")
732 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
736 (define_expand "cstore<mode>4"
737 [(set (match_operand:SI 0 "nvptx_register_operand")
738 (match_operator:SI 1 "nvptx_float_comparison_operator"
739 [(match_operand:SDFM 2 "nvptx_register_operand")
740 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
746 (define_insn "call_insn"
747 [(match_parallel 2 "call_operation"
748 [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs"))
749 (match_operand 1))])]
752 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
755 (define_insn "call_value_insn"
756 [(match_parallel 3 "call_operation"
757 [(set (match_operand 0 "nvptx_register_operand" "=R")
758 (call (mem:QI (match_operand 1 "call_insn_operand" "Rs"))
759 (match_operand 2)))])]
762 return nvptx_output_call_insn (insn, operands[0], operands[1]);
765 (define_expand "call"
766 [(match_operand 0 "" "")]
769 nvptx_expand_call (NULL_RTX, operands[0]);
773 (define_expand "call_value"
774 [(match_operand 0 "" "")
775 (match_operand 1 "" "")]
778 nvptx_expand_call (operands[0], operands[1]);
782 ;; Floating point arithmetic.
784 (define_insn "add<mode>3"
785 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
786 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
787 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
789 "%.\\tadd%t0\\t%0, %1, %2;")
791 (define_insn "sub<mode>3"
792 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
793 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
794 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
796 "%.\\tsub%t0\\t%0, %1, %2;")
798 (define_insn "mul<mode>3"
799 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
800 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
801 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
803 "%.\\tmul%t0\\t%0, %1, %2;")
805 (define_insn "fma<mode>4"
806 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
807 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
808 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
809 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
811 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
813 (define_insn "div<mode>3"
814 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
815 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
816 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
818 "%.\\tdiv%#%t0\\t%0, %1, %2;")
820 (define_insn "copysign<mode>3"
821 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
822 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
823 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
826 "%.\\tcopysign%t0\\t%0, %2, %1;")
828 (define_insn "smin<mode>3"
829 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
830 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
831 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
833 "%.\\tmin%t0\\t%0, %1, %2;")
835 (define_insn "smax<mode>3"
836 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
837 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
838 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
840 "%.\\tmax%t0\\t%0, %1, %2;")
842 (define_insn "abs<mode>2"
843 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
844 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
846 "%.\\tabs%t0\\t%0, %1;")
848 (define_insn "neg<mode>2"
849 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
850 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
852 "%.\\tneg%t0\\t%0, %1;")
854 (define_insn "sqrt<mode>2"
855 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
856 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
858 "%.\\tsqrt%#%t0\\t%0, %1;")
860 (define_expand "sincossf3"
861 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
862 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
864 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
865 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
866 "flag_unsafe_math_optimizations"
868 operands[2] = make_safe_from (operands[2], operands[0]);
871 (define_insn "sinsf2"
872 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
873 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
875 "flag_unsafe_math_optimizations"
876 "%.\\tsin.approx%t0\\t%0, %1;")
878 (define_insn "cossf2"
879 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
880 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
882 "flag_unsafe_math_optimizations"
883 "%.\\tcos.approx%t0\\t%0, %1;")
885 (define_insn "log2sf2"
886 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
887 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
889 "flag_unsafe_math_optimizations"
890 "%.\\tlg2.approx%t0\\t%0, %1;")
892 (define_insn "exp2sf2"
893 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
894 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
896 "flag_unsafe_math_optimizations"
897 "%.\\tex2.approx%t0\\t%0, %1;")
899 ;; Conversions involving floating point
901 (define_insn "extendsfdf2"
902 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
903 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
905 "%.\\tcvt%t0%t1\\t%0, %1;")
907 (define_insn "truncdfsf2"
908 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
909 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
911 "%.\\tcvt%#%t0%t1\\t%0, %1;")
913 (define_insn "floatunssi<mode>2"
914 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
915 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
917 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
919 (define_insn "floatsi<mode>2"
920 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
921 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
923 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
925 (define_insn "floatunsdi<mode>2"
926 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
927 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
929 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
931 (define_insn "floatdi<mode>2"
932 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
933 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
935 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
937 (define_insn "fixuns_trunc<mode>si2"
938 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
939 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
941 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
943 (define_insn "fix_trunc<mode>si2"
944 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
945 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
947 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
949 (define_insn "fixuns_trunc<mode>di2"
950 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
951 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
953 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
955 (define_insn "fix_trunc<mode>di2"
956 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
957 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
959 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
961 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
962 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
963 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
964 (UNSPEC_FPINT_BTRUNC "btrunc")
965 (UNSPEC_FPINT_CEIL "ceil")
966 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
967 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
968 (UNSPEC_FPINT_BTRUNC ".rzi")
969 (UNSPEC_FPINT_CEIL ".rpi")
970 (UNSPEC_FPINT_NEARBYINT "%#i")])
972 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
973 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
974 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
977 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
979 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
980 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
981 (UNSPEC_FPINT_CEIL "lceil")])
982 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
983 (UNSPEC_FPINT_CEIL ".rpi")])
985 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
986 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
987 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
990 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1004 (define_insn "fake_nop"
1008 .reg .u32 %%nop_src;
1009 .reg .u32 %%nop_dst;
1010 mov.u32 %%nop_dst, %%nop_src;
1013 (define_insn "return"
1017 return nvptx_output_return ();
1019 [(set_attr "predicable" "false")])
1021 (define_expand "epilogue"
1022 [(clobber (const_int 0))]
1025 if (TARGET_SOFT_STACK)
1026 emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
1027 SOFTSTACK_PREV_REGNUM)));
1028 emit_jump_insn (gen_return ());
1032 (define_expand "nonlocal_goto"
1033 [(match_operand 0 "" "")
1034 (match_operand 1 "" "")
1035 (match_operand 2 "" "")
1036 (match_operand 3 "" "")]
1039 sorry ("target cannot support nonlocal goto.");
1040 emit_insn (gen_nop ());
1044 (define_expand "nonlocal_goto_receiver"
1048 sorry ("target cannot support nonlocal goto.");
1051 (define_expand "allocate_stack"
1052 [(match_operand 0 "nvptx_register_operand")
1053 (match_operand 1 "nvptx_register_operand")]
1056 if (TARGET_SOFT_STACK)
1058 emit_move_insn (stack_pointer_rtx,
1059 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1060 emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
1061 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1064 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1065 only) but notes it is not implemented. The assembler emits a
1066 confused error message. Issue a blunt one now instead. */
1067 sorry ("target cannot support alloca.");
1068 emit_insn (gen_nop ());
1072 (define_insn "set_softstack_insn"
1073 [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
1074 UNSPEC_SET_SOFTSTACK)]
1077 return nvptx_output_set_softstack (REGNO (operands[0]));
1080 (define_expand "restore_stack_block"
1081 [(match_operand 0 "register_operand" "")
1082 (match_operand 1 "register_operand" "")]
1085 if (TARGET_SOFT_STACK)
1087 emit_move_insn (operands[0], operands[1]);
1088 emit_insn (gen_set_softstack_insn (operands[0]));
1093 (define_expand "restore_stack_function"
1094 [(match_operand 0 "register_operand" "")
1095 (match_operand 1 "register_operand" "")]
1102 [(trap_if (const_int 1) (const_int 0))]
1106 (define_insn "trap_if_true"
1107 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1111 "%j0 trap; %j0 exit;"
1112 [(set_attr "predicable" "false")])
1114 (define_insn "trap_if_false"
1115 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1119 "%J0 trap; %J0 exit;"
1120 [(set_attr "predicable" "false")])
1122 (define_expand "ctrap<mode>4"
1123 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1124 [(match_operand:SDIM 1 "nvptx_register_operand")
1125 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1126 (match_operand 3 "const0_operand"))]
1129 rtx t = nvptx_expand_compare (operands[0]);
1130 emit_insn (gen_trap_if_true (t));
1134 (define_insn "oacc_dim_size"
1135 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1136 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1140 static const char *const asms[] =
1141 { /* Must match oacc_loop_levels ordering. */
1142 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1143 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1144 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1146 return asms[INTVAL (operands[1])];
1149 (define_insn "oacc_dim_pos"
1150 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1151 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1155 static const char *const asms[] =
1156 { /* Must match oacc_loop_levels ordering. */
1157 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1158 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1159 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1161 return asms[INTVAL (operands[1])];
1164 (define_insn "nvptx_fork"
1165 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1169 [(set_attr "predicable" "false")])
1171 (define_insn "nvptx_forked"
1172 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1176 [(set_attr "predicable" "false")])
1178 (define_insn "nvptx_joining"
1179 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1183 [(set_attr "predicable" "false")])
1185 (define_insn "nvptx_join"
1186 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1190 [(set_attr "predicable" "false")])
1192 (define_expand "oacc_fork"
1193 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1194 (match_operand:SI 1 "general_operand" ""))
1195 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1199 if (operands[0] != const0_rtx)
1200 emit_move_insn (operands[0], operands[1]);
1201 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1205 (define_expand "oacc_join"
1206 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1207 (match_operand:SI 1 "general_operand" ""))
1208 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1212 if (operands[0] != const0_rtx)
1213 emit_move_insn (operands[0], operands[1]);
1214 nvptx_expand_oacc_join (INTVAL (operands[2]));
1218 ;; only 32-bit shuffles exist.
1219 (define_insn "nvptx_shuffle<mode>"
1220 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1222 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1223 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1224 (match_operand:SI 3 "const_int_operand" "n")]
1227 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1229 (define_insn "nvptx_vote_ballot"
1230 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1231 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1232 UNSPEC_VOTE_BALLOT))]
1234 "%.\\tvote.ballot.b32\\t%0, %1;")
1236 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1238 (define_insn "omp_simt_enter_insn"
1239 [(set (match_operand 0 "nvptx_register_operand" "=R")
1240 (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
1241 (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
1242 UNSPECV_SIMT_ENTER))]
1245 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1248 (define_expand "omp_simt_enter"
1249 [(match_operand 0 "nvptx_register_operand" "=R")
1250 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1251 (match_operand 2 "const_int_operand" "n")]
1254 if (!CONST_INT_P (operands[1]))
1255 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1257 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1258 cfun->machine->simt_stack_size);
1259 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1260 cfun->machine->simt_stack_align);
1261 cfun->machine->has_simtreg = true;
1262 emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
1266 (define_insn "omp_simt_exit"
1267 [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
1271 return nvptx_output_simt_exit (operands[0]);
1274 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1275 (define_insn "omp_simt_lane"
1276 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1277 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1279 "%.\\tmov.u32\\t%0, %%laneid;")
1281 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1282 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1283 (define_expand "omp_simt_ordered"
1284 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1285 (match_operand:SI 1 "nvptx_register_operand" "R")]
1288 emit_move_insn (operands[0], operands[1]);
1289 emit_insn (gen_nvptx_nounroll ());
1293 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1295 (define_expand "omp_simt_xchg_bfly"
1296 [(match_operand 0 "nvptx_register_operand" "=R")
1297 (match_operand 1 "nvptx_register_operand" "R")
1298 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1301 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1306 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1307 ;; from lane given by index in operand 2 to operand 0 in all lanes
1308 (define_expand "omp_simt_xchg_idx"
1309 [(match_operand 0 "nvptx_register_operand" "=R")
1310 (match_operand 1 "nvptx_register_operand" "R")
1311 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1314 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1319 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1320 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1321 (define_expand "omp_simt_vote_any"
1322 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1323 (match_operand:SI 1 "nvptx_register_operand" "R")]
1326 rtx pred = gen_reg_rtx (BImode);
1327 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1328 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1332 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1333 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1334 (define_expand "omp_simt_last_lane"
1335 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1336 (match_operand:SI 1 "nvptx_register_operand" "R")]
1339 rtx pred = gen_reg_rtx (BImode);
1340 rtx tmp = gen_reg_rtx (SImode);
1341 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1342 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1343 emit_insn (gen_ctzsi2 (operands[0], tmp));
1347 ;; extract parts of a 64 bit object into 2 32-bit ints
1348 (define_insn "unpack<mode>si2"
1349 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1350 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1351 (const_int 0)] UNSPEC_BIT_CONV))
1352 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1353 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1355 "%.\\tmov.b64\\t{%0,%1}, %2;")
1357 ;; pack 2 32-bit ints into a 64 bit object
1358 (define_insn "packsi<mode>2"
1359 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1360 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1361 (match_operand:SI 2 "nvptx_register_operand" "R")]
1364 "%.\\tmov.b64\\t%0, {%1,%2};")
1368 (define_expand "atomic_compare_and_swap<mode>"
1369 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1370 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1371 (match_operand:SDIM 2 "memory_operand") ;; memory
1372 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1373 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1374 (match_operand:SI 5 "const_int_operand") ;; is_weak
1375 (match_operand:SI 6 "const_int_operand") ;; success model
1376 (match_operand:SI 7 "const_int_operand")] ;; failure model
1379 emit_insn (gen_atomic_compare_and_swap<mode>_1
1380 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1382 rtx cond = gen_reg_rtx (BImode);
1383 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1384 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1388 (define_insn "atomic_compare_and_swap<mode>_1"
1389 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1390 (unspec_volatile:SDIM
1391 [(match_operand:SDIM 1 "memory_operand" "+m")
1392 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1393 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1394 (match_operand:SI 4 "const_int_operand")]
1397 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1399 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1400 [(set_attr "atomic" "true")])
1402 (define_insn "atomic_exchange<mode>"
1403 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1404 (unspec_volatile:SDIM
1405 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1406 (match_operand:SI 3 "const_int_operand")] ;; model
1409 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1411 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1412 [(set_attr "atomic" "true")])
1414 (define_insn "atomic_fetch_add<mode>"
1415 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1416 (unspec_volatile:SDIM
1417 [(plus:SDIM (match_dup 1)
1418 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1419 (match_operand:SI 3 "const_int_operand")] ;; model
1421 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1424 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1425 [(set_attr "atomic" "true")])
1427 (define_insn "atomic_fetch_addsf"
1428 [(set (match_operand:SF 1 "memory_operand" "+m")
1430 [(plus:SF (match_dup 1)
1431 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1432 (match_operand:SI 3 "const_int_operand")] ;; model
1434 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1437 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1438 [(set_attr "atomic" "true")])
1440 (define_code_iterator any_logic [and ior xor])
1441 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1443 (define_insn "atomic_fetch_<logic><mode>"
1444 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1445 (unspec_volatile:SDIM
1446 [(any_logic:SDIM (match_dup 1)
1447 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1448 (match_operand:SI 3 "const_int_operand")] ;; model
1450 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1452 "<MODE>mode == SImode || TARGET_SM35"
1453 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1454 [(set_attr "atomic" "true")])
1456 (define_insn "nvptx_barsync"
1457 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
1461 [(set_attr "predicable" "false")])
1463 (define_expand "memory_barrier"
1465 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1468 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1469 MEM_VOLATILE_P (operands[0]) = 1;
1472 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1473 ;; (corresponding to cuda functions threadfence_block, threadfence and
1474 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1475 ;; may be overconservative, but before using membar.gl instead we'll need to
1476 ;; explain in detail why it's safe to use. For now, use membar.sys.
1477 (define_insn "*memory_barrier"
1478 [(set (match_operand:BLK 0 "" "")
1479 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1482 [(set_attr "predicable" "false")])
1484 (define_expand "nvptx_membar_cta"
1486 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1489 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1490 MEM_VOLATILE_P (operands[0]) = 1;
1493 (define_insn "*nvptx_membar_cta"
1494 [(set (match_operand:BLK 0 "" "")
1495 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1498 [(set_attr "predicable" "false")])
1500 (define_insn "nvptx_nounroll"
1501 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1503 "\\t.pragma \\\"nounroll\\\";"
1504 [(set_attr "predicable" "false")])