1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
35 UNSPEC_FPINT_NEARBYINT
53 (define_c_enum "unspecv" [
64 UNSPECV_UNIFORM_WARP_CHECK
83 (define_attr "subregs_ok" "false,true"
84 (const_string "false"))
86 (define_attr "atomic" "false,true"
87 (const_string "false"))
89 ;; The nvptx operand predicates, in general, don't permit subregs and
90 ;; only literal constants, which differ from the generic ones, which
91 ;; permit subregs and symbolc constants (as appropriate)
92 (define_predicate "nvptx_register_operand"
95 return register_operand (op, mode);
98 (define_predicate "nvptx_register_or_complex_di_df_register_operand"
99 (ior (match_code "reg")
100 (match_code "concat"))
102 if (GET_CODE (op) == CONCAT)
103 return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode)
104 && nvptx_register_operand (XEXP (op, 0), mode)
105 && nvptx_register_operand (XEXP (op, 1), mode));
107 return nvptx_register_operand (op, mode);
110 (define_predicate "nvptx_nonimmediate_operand"
111 (match_code "mem,reg")
113 return (REG_P (op) ? register_operand (op, mode)
114 : memory_operand (op, mode));
117 (define_predicate "nvptx_nonmemory_operand"
118 (match_code "reg,const_int,const_double")
120 return (REG_P (op) ? register_operand (op, mode)
121 : immediate_operand (op, mode));
124 (define_predicate "const0_operand"
125 (and (match_code "const_int")
126 (match_test "op == const0_rtx")))
128 ;; True if this operator is valid for predication.
129 (define_predicate "predicate_operator"
130 (match_code "eq,ne"))
132 (define_predicate "ne_operator"
135 (define_predicate "nvptx_comparison_operator"
136 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
138 (define_predicate "nvptx_float_comparison_operator"
139 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
141 (define_predicate "nvptx_vector_index_operand"
142 (and (match_code "const_int")
143 (match_test "UINTVAL (op) < 4")))
145 ;; Test for a valid operand for a call instruction.
146 (define_predicate "call_insn_operand"
147 (match_code "symbol_ref,reg")
149 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
152 ;; Return true if OP is a call with parallel USEs of the argument
154 (define_predicate "call_operation"
155 (match_code "parallel")
157 int arg_end = XVECLEN (op, 0);
159 for (int i = 1; i < arg_end; i++)
161 rtx elt = XVECEXP (op, 0, i);
163 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
169 ;; Test for a function symbol ref operand
170 (define_predicate "symbol_ref_function_operand"
171 (match_code "symbol_ref")
173 return SYMBOL_REF_FUNCTION_P (op);
176 (define_attr "predicable" "no,yes"
177 (const_string "yes"))
180 [(match_operator 0 "predicate_operator"
181 [(match_operand:BI 1 "nvptx_register_operand" "")
182 (match_operand:BI 2 "const0_operand" "")])]
187 (define_constraint "P0"
188 "An integer with the value 0."
189 (and (match_code "const_int")
190 (match_test "ival == 0")))
192 (define_constraint "P1"
193 "An integer with the value 1."
194 (and (match_code "const_int")
195 (match_test "ival == 1")))
197 (define_constraint "Pn"
198 "An integer with the value -1."
199 (and (match_code "const_int")
200 (match_test "ival == -1")))
202 (define_constraint "R"
206 (define_constraint "Ia"
207 "Any integer constant."
208 (and (match_code "const_int") (match_test "true")))
210 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
211 (define_mode_iterator QHSDIM [QI HI SI DI])
212 (define_mode_iterator HSDIM [HI SI DI])
213 (define_mode_iterator BHSDIM [BI HI SI DI])
214 (define_mode_iterator SDIM [SI DI])
215 (define_mode_iterator SDISDFM [SI DI SF DF])
216 (define_mode_iterator QHIM [QI HI])
217 (define_mode_iterator QHSIM [QI HI SI])
218 (define_mode_iterator SDFM [SF DF])
219 (define_mode_iterator HSFM [HF SF])
220 (define_mode_iterator SDCM [SC DC])
221 (define_mode_iterator BITS [SI SF])
222 (define_mode_iterator BITD [DI DF])
223 (define_mode_iterator VECIM [V2SI V2DI])
225 ;; This mode iterator allows :P to be used for patterns that operate on
226 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
227 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
229 ;; Define element mode for each vector mode.
230 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
231 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
233 ;; We should get away with not defining memory alternatives, since we don't
234 ;; get variables in this mode and pseudos are never spilled.
236 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
237 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
240 %.\\tmov%t0\\t%0, %1;
241 %.\\tsetp.eq.u32\\t%0, 1, 0;
242 %.\\tsetp.eq.u32\\t%0, 1, 1;")
244 (define_insn "*mov<mode>_insn"
245 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
246 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
247 "!MEM_P (operands[0]) || REG_P (operands[1])"
249 if (which_alternative == 1)
250 return "%.\\tld%A1%u1\\t%0, %1;";
251 if (which_alternative == 2)
252 return "%.\\tst%A0%u0\\t%0, %1;";
254 return nvptx_output_mov_insn (operands[0], operands[1]);
256 [(set_attr "subregs_ok" "true")])
258 (define_insn "*mov<mode>_insn"
259 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
260 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
261 "!MEM_P (operands[0]) || REG_P (operands[1])"
263 if (which_alternative == 1)
264 return "%.\\tld%A1%u1\\t%0, %1;";
265 if (which_alternative == 2)
266 return "%.\\tst%A0%u0\\t%0, %1;";
268 return nvptx_output_mov_insn (operands[0], operands[1]);
270 [(set_attr "subregs_ok" "true")])
272 ;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
274 [(set (match_operand:DI 0 "nvptx_register_operand")
275 (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
276 (match_operand 2 "const_int_operand"))))]
278 [(set (match_dup 0) (match_dup 1))
279 (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
283 (define_insn "*mov<mode>_insn"
284 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
285 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
286 "!MEM_P (operands[0]) || REG_P (operands[1])"
288 if (which_alternative == 1)
289 return "%.\\tld%A1%u0\\t%0, %1;";
290 if (which_alternative == 2)
291 return "%.\\tst%A0%u1\\t%0, %1;";
293 return nvptx_output_mov_insn (operands[0], operands[1]);
295 [(set_attr "subregs_ok" "true")])
297 (define_insn "*movhf_insn"
298 [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m")
299 (match_operand:HF 1 "nonimmediate_operand" "R,m,R"))]
300 "!MEM_P (operands[0]) || REG_P (operands[1])"
302 %.\\tmov.b16\\t%0, %1;
303 %.\\tld.b16\\t%0, %1;
304 %.\\tst.b16\\t%0, %1;"
305 [(set_attr "subregs_ok" "true")])
307 (define_expand "movhf"
308 [(set (match_operand:HF 0 "nonimmediate_operand" "")
309 (match_operand:HF 1 "nonimmediate_operand" ""))]
312 /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE. */
313 if (CONST_DOUBLE_P (operands[1]))
315 rtx tmp1 = gen_reg_rtx (SFmode);
316 REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]);
317 real_convert (&d, SFmode, &d);
318 emit_move_insn (tmp1, const_double_from_real_value (d, SFmode));
320 if (!REG_P (operands[0]))
322 rtx tmp2 = gen_reg_rtx (HFmode);
323 emit_insn (gen_truncsfhf2 (tmp2, tmp1));
324 emit_move_insn (operands[0], tmp2);
327 emit_insn (gen_truncsfhf2 (operands[0], tmp1));
331 if (MEM_P (operands[0]) && !REG_P (operands[1]))
333 rtx tmp = gen_reg_rtx (HFmode);
334 emit_move_insn (tmp, operands[1]);
335 emit_move_insn (operands[0], tmp);
340 (define_insn "load_arg_reg<mode>"
341 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
342 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
345 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
347 (define_insn "load_arg_reg<mode>"
348 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
349 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
352 "%.\\tmov%t0\\t%0, %%ar%1;")
354 (define_expand "mov<mode>"
355 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
356 (match_operand:VECIM 1 "general_operand" ""))]
359 if (MEM_P (operands[0]) && !REG_P (operands[1]))
361 rtx tmp = gen_reg_rtx (<MODE>mode);
362 emit_move_insn (tmp, operands[1]);
363 emit_move_insn (operands[0], tmp);
368 (define_expand "mov<mode>"
369 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
370 (match_operand:QHSDISDFM 1 "general_operand" ""))]
373 if (MEM_P (operands[0]) && !REG_P (operands[1]))
375 rtx tmp = gen_reg_rtx (<MODE>mode);
376 emit_move_insn (tmp, operands[1]);
377 emit_move_insn (operands[0], tmp);
381 if (GET_CODE (operands[1]) == LABEL_REF)
382 sorry ("target cannot support label values");
385 (define_insn "zero_extendqihi2"
386 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
387 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
390 %.\\tcvt.u16.u%T1\\t%0, %1;
391 %.\\tld%A1.u8\\t%0, %1;"
392 [(set_attr "subregs_ok" "true")])
394 (define_insn "zero_extend<mode>si2"
395 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
396 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
399 %.\\tcvt.u32.u%T1\\t%0, %1;
400 %.\\tld%A1.u%T1\\t%0, %1;"
401 [(set_attr "subregs_ok" "true")])
403 (define_insn "zero_extend<mode>di2"
404 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
405 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
408 %.\\tcvt.u64.u%T1\\t%0, %1;
409 %.\\tld%A1%u1\\t%0, %1;"
410 [(set_attr "subregs_ok" "true")])
412 (define_insn "extendqihi2"
413 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
414 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
416 "%.\\tcvt.s16.s8\\t%0, %1;"
417 [(set_attr "subregs_ok" "true")])
419 (define_insn "extend<mode>si2"
420 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
421 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
424 %.\\tcvt.s32.s%T1\\t%0, %1;
425 %.\\tld%A1.s%T1\\t%0, %1;"
426 [(set_attr "subregs_ok" "true")])
428 (define_insn "extend<mode>di2"
429 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
430 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
433 %.\\tcvt.s64.s%T1\\t%0, %1;
434 %.\\tld%A1.s%T1\\t%0, %1;"
435 [(set_attr "subregs_ok" "true")])
437 (define_insn "trunchiqi2"
438 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
439 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
442 %.\\tcvt%t0.u16\\t%0, %1;
443 %.\\tst%A0.u8\\t%0, %1;"
444 [(set_attr "subregs_ok" "true")])
446 (define_insn "truncsi<mode>2"
447 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
448 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
451 if (which_alternative == 1)
452 return "%.\\tst%A0.u%T0\\t%0, %1;";
453 if (GET_MODE (operands[0]) == QImode)
454 return "%.\\tmov%t0\\t%0, %1;";
455 return "%.\\tcvt%t0.u32\\t%0, %1;";
457 [(set_attr "subregs_ok" "true")])
459 (define_insn "truncdi<mode>2"
460 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
461 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
464 %.\\tcvt%t0.u64\\t%0, %1;
465 %.\\tst%A0.u%T0\\t%0, %1;"
466 [(set_attr "subregs_ok" "true")])
468 ;; Sign-extensions of truncations
470 (define_insn "*extend_trunc_<mode>2_qi"
471 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
473 (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
475 "%.\\tcvt.s%T0.s8\\t%0, %1;"
476 [(set_attr "subregs_ok" "true")])
478 (define_insn "*extend_trunc_<mode>2_hi"
479 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
481 (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
483 "%.\\tcvt.s%T0.s16\\t%0, %1;"
484 [(set_attr "subregs_ok" "true")])
486 (define_insn "*extend_trunc_di2_si"
487 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
489 (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
491 "%.\\tcvt.s64.s32\\t%0, %1;"
492 [(set_attr "subregs_ok" "true")])
494 ;; Integer arithmetic
496 (define_insn "add<mode>3"
497 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
498 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
499 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
501 "%.\\tadd%t0\\t%0, %1, %2;")
503 (define_insn "*vadd_addsi4"
504 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
505 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
506 (match_operand:SI 2 "nvptx_register_operand" "R"))
507 (match_operand:SI 3 "nvptx_register_operand" "R")))]
509 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
511 (define_insn "*vsub_addsi4"
512 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
513 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
514 (match_operand:SI 2 "nvptx_register_operand" "R"))
515 (match_operand:SI 3 "nvptx_register_operand" "R")))]
517 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
519 (define_insn "sub<mode>3"
520 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
521 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
522 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
525 if (GET_MODE (operands[0]) == HImode)
526 /* Workaround https://developer.nvidia.com/nvidia_bug/3527713.
528 return "%.\\tsub.s16\\t%0, %1, %2;";
530 return "%.\\tsub%t0\\t%0, %1, %2;";
533 (define_insn "mul<mode>3"
534 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
535 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
536 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
538 "%.\\tmul.lo%t0\\t%0, %1, %2;")
540 (define_insn "*mad<mode>3"
541 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
542 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
543 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
544 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
546 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
548 (define_insn "div<mode>3"
549 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
550 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
551 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
553 "%.\\tdiv.s%T0\\t%0, %1, %2;")
555 (define_insn "udiv<mode>3"
556 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
557 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
558 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
560 "%.\\tdiv.u%T0\\t%0, %1, %2;")
562 (define_insn "mod<mode>3"
563 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
564 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
565 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
567 "%.\\trem.s%T0\\t%0, %1, %2;")
569 (define_insn "umod<mode>3"
570 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
571 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
572 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
574 "%.\\trem.u%T0\\t%0, %1, %2;")
576 (define_insn "smin<mode>3"
577 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
578 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
579 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
581 "%.\\tmin.s%T0\\t%0, %1, %2;")
583 (define_insn "umin<mode>3"
584 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
585 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
586 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
588 "%.\\tmin.u%T0\\t%0, %1, %2;")
590 (define_insn "smax<mode>3"
591 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
592 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
593 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
595 "%.\\tmax.s%T0\\t%0, %1, %2;")
597 (define_insn "umax<mode>3"
598 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
599 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
600 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
602 "%.\\tmax.u%T0\\t%0, %1, %2;")
604 (define_insn "abs<mode>2"
605 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
606 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
608 "%.\\tabs.s%T0\\t%0, %1;")
610 (define_insn "neg<mode>2"
611 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
612 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
614 "%.\\tneg.s%T0\\t%0, %1;")
616 (define_insn "one_cmpl<mode>2"
617 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
618 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
620 "%.\\tnot.b%T0\\t%0, %1;")
622 (define_insn "one_cmplbi2"
623 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
624 (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))]
626 "%.\\tnot.pred\\t%0, %1;")
628 (define_insn "*cnot<mode>2"
629 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
630 (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
633 "%.\\tcnot.b%T0\\t%0, %1;")
635 (define_insn "bitrev<mode>2"
636 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
637 (bitreverse:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
639 "%.\\tbrev.b%T0\\t%0, %1;")
641 (define_insn "clz<mode>2"
642 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
643 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
645 "%.\\tclz.b%T1\\t%0, %1;")
647 (define_expand "ctz<mode>2"
648 [(set (match_operand:SI 0 "nvptx_register_operand" "")
649 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
652 rtx tmpreg = gen_reg_rtx (<MODE>mode);
653 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
654 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
658 (define_insn "popcountsi2"
659 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
660 (popcount:SI (match_operand:SI 1 "nvptx_register_operand" "R")))]
662 "%.\\tpopc.b32\\t%0, %1;")
664 (define_insn "popcountdi2"
665 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
667 (popcount:DI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
669 "%.\\tpopc.b64\\t%0, %1;")
671 ;; Multiplication variants
673 (define_insn "mulhisi3"
674 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
675 (mult:SI (sign_extend:SI
676 (match_operand:HI 1 "nvptx_register_operand" "R"))
678 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
680 "%.\\tmul.wide.s16\\t%0, %1, %2;")
682 (define_insn "mulsidi3"
683 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
684 (mult:DI (sign_extend:DI
685 (match_operand:SI 1 "nvptx_register_operand" "R"))
687 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
689 "%.\\tmul.wide.s32\\t%0, %1, %2;")
691 (define_insn "umulhisi3"
692 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
693 (mult:SI (zero_extend:SI
694 (match_operand:HI 1 "nvptx_register_operand" "R"))
696 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
698 "%.\\tmul.wide.u16\\t%0, %1, %2;")
700 (define_insn "umulsidi3"
701 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
702 (mult:DI (zero_extend:DI
703 (match_operand:SI 1 "nvptx_register_operand" "R"))
705 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
707 "%.\\tmul.wide.u32\\t%0, %1, %2;")
709 (define_expand "mulditi3"
710 [(set (match_operand:TI 0 "nvptx_register_operand")
711 (mult:TI (sign_extend:TI
712 (match_operand:DI 1 "nvptx_register_operand"))
714 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
717 rtx hi = gen_reg_rtx (DImode);
718 rtx lo = gen_reg_rtx (DImode);
719 emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2]));
720 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
721 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
722 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
726 (define_expand "umulditi3"
727 [(set (match_operand:TI 0 "nvptx_register_operand")
728 (mult:TI (zero_extend:TI
729 (match_operand:DI 1 "nvptx_register_operand"))
731 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
734 rtx hi = gen_reg_rtx (DImode);
735 rtx lo = gen_reg_rtx (DImode);
736 emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2]));
737 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
738 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
739 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
743 (define_insn "smul<mode>3_highpart"
744 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
746 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
747 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
749 "%.\\tmul.hi.s%T0\\t%0, %1, %2;")
751 (define_insn "umul<mode>3_highpart"
752 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
754 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
755 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
757 "%.\\tmul.hi.u%T0\\t%0, %1, %2;")
759 (define_insn "*smulhi3_highpart_2"
760 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
763 (mult:SI (sign_extend:SI
764 (match_operand:HI 1 "nvptx_register_operand" "R"))
766 (match_operand:HI 2 "nvptx_register_operand" "R")))
769 "%.\\tmul.hi.s16\\t%0, %1, %2;")
771 (define_insn "*smulsi3_highpart_2"
772 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
775 (mult:DI (sign_extend:DI
776 (match_operand:SI 1 "nvptx_register_operand" "R"))
778 (match_operand:SI 2 "nvptx_register_operand" "R")))
781 "%.\\tmul.hi.s32\\t%0, %1, %2;")
783 (define_insn "*umulhi3_highpart_2"
784 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
787 (mult:SI (zero_extend:SI
788 (match_operand:HI 1 "nvptx_register_operand" "R"))
790 (match_operand:HI 2 "nvptx_register_operand" "R")))
793 "%.\\tmul.hi.u16\\t%0, %1, %2;")
795 (define_insn "*umulsi3_highpart_2"
796 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
799 (mult:DI (zero_extend:DI
800 (match_operand:SI 1 "nvptx_register_operand" "R"))
802 (match_operand:SI 2 "nvptx_register_operand" "R")))
805 "%.\\tmul.hi.u32\\t%0, %1, %2;")
809 (define_insn "ashl<mode>3"
810 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
811 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
812 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
814 "%.\\tshl.b%T0\\t%0, %1, %2;")
816 (define_insn "ashr<mode>3"
817 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
818 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
819 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
821 "%.\\tshr.s%T0\\t%0, %1, %2;")
823 (define_insn "lshr<mode>3"
824 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
825 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
826 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
828 "%.\\tshr.u%T0\\t%0, %1, %2;")
830 (define_insn "rotlsi3"
831 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
832 (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R")
833 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
836 "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;")
838 (define_insn "rotrsi3"
839 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
840 (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R")
841 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
844 "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;")
846 ;; Logical operations
848 (define_code_iterator any_logic [and ior xor])
849 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
850 (define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")])
852 (define_insn "<ilogic><mode>3"
853 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
855 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
856 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
858 "%.\\t<logic>.b%T0\\t%0, %1, %2;")
860 (define_insn "<ilogic>bi3"
861 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
862 (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R")
863 (match_operand:BI 2 "nvptx_register_operand" "R")))]
865 "%.\\t<logic>.pred\\t%0, %1, %2;")
868 [(set (match_operand:HSDIM 0 "nvptx_register_operand")
870 (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand")
872 (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand")
874 "can_create_pseudo_p ()"
875 [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2)))
876 (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))]
878 operands[3] = gen_reg_rtx (BImode);
881 ;; Comparisons and branches
883 (define_insn "cmp<mode>"
884 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
885 (match_operator:BI 1 "nvptx_comparison_operator"
886 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
887 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
889 "%.\\tsetp%c1\\t%0, %2, %3;")
891 (define_insn "*cmp<mode>"
892 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
893 (match_operator:BI 1 "nvptx_float_comparison_operator"
894 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
895 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
897 "%.\\tsetp%c1\\t%0, %2, %3;")
899 (define_insn "*cmphf"
900 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
901 (match_operator:BI 1 "nvptx_float_comparison_operator"
902 [(match_operand:HF 2 "nvptx_register_operand" "R")
903 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
905 "%.\\tsetp%c1\\t%0, %2, %3;")
909 (label_ref (match_operand 0 "" "")))]
913 (define_insn "br_true"
915 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
917 (label_ref (match_operand 1 "" ""))
921 [(set_attr "predicable" "no")])
923 (define_insn "br_false"
925 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
927 (label_ref (match_operand 1 "" ""))
931 [(set_attr "predicable" "no")])
933 ;; unified conditional branch
934 (define_insn "br_true_uni"
935 [(set (pc) (if_then_else
936 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
937 UNSPEC_BR_UNIFIED) (const_int 0))
938 (label_ref (match_operand 1 "" "")) (pc)))]
940 "%j0\\tbra.uni\\t%l1;"
941 [(set_attr "predicable" "no")])
943 (define_insn "br_false_uni"
944 [(set (pc) (if_then_else
945 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
946 UNSPEC_BR_UNIFIED) (const_int 0))
947 (label_ref (match_operand 1 "" "")) (pc)))]
949 "%J0\\tbra.uni\\t%l1;"
950 [(set_attr "predicable" "no")])
952 (define_expand "cbranch<mode>4"
954 (if_then_else (match_operator 0 "nvptx_comparison_operator"
955 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
956 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
957 (label_ref (match_operand 3 "" ""))
961 rtx t = nvptx_expand_compare (operands[0]);
963 operands[1] = XEXP (t, 0);
964 operands[2] = XEXP (t, 1);
967 (define_expand "cbranch<mode>4"
969 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
970 [(match_operand:SDFM 1 "nvptx_register_operand" "")
971 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
972 (label_ref (match_operand 3 "" ""))
976 rtx t = nvptx_expand_compare (operands[0]);
978 operands[1] = XEXP (t, 0);
979 operands[2] = XEXP (t, 1);
982 (define_expand "cbranchbi4"
984 (if_then_else (match_operator 0 "predicate_operator"
985 [(match_operand:BI 1 "nvptx_register_operand" "")
986 (match_operand:BI 2 "const0_operand" "")])
987 (label_ref (match_operand 3 "" ""))
992 ;; Conditional stores
994 (define_insn "setcc<mode>_from_bi"
995 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
996 (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
999 "%.\\tselp%t0\\t%0, 1, 0, %1;")
1001 (define_insn "*setcc<mode>_from_not_bi"
1002 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1003 (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
1006 "%.\\tselp%t0\\t%0, 0, 1, %1;")
1008 (define_insn "extendbi<mode>2"
1009 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1011 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1013 "%.\\tselp%t0\\t%0, -1, 0, %1;")
1015 (define_insn "zero_extendbi<mode>2"
1016 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1018 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1020 "%.\\tselp%t0\\t%0, 1, 0, %1;")
1022 (define_insn "sel_true<mode>"
1023 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1025 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1026 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1027 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1029 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1031 (define_insn "sel_true<mode>"
1032 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1034 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1035 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1036 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1038 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1040 (define_insn "sel_false<mode>"
1041 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1043 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1044 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1045 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1047 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1049 (define_insn "sel_false<mode>"
1050 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1052 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1053 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1054 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1056 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1058 (define_code_iterator eqne [eq ne])
1060 ;; Split negation of a predicate into a conditional move.
1061 (define_insn_and_split "*selp<mode>_neg_<code>"
1062 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1063 (neg:HSDIM (eqne:HSDIM
1064 (match_operand:BI 1 "nvptx_register_operand" "R")
1071 (eqne (match_dup 1) (const_int 0))
1075 ;; Split bitwise not of a predicate into a conditional move.
1076 (define_insn_and_split "*selp<mode>_not_<code>"
1077 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1078 (not:HSDIM (eqne:HSDIM
1079 (match_operand:BI 1 "nvptx_register_operand" "R")
1086 (eqne (match_dup 1) (const_int 0))
1090 (define_insn "*setcc_int<mode>"
1091 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1093 (match_operator:SI 1 "nvptx_comparison_operator"
1094 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1095 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
1097 "%.\\tset%t0%c1\\t%0, %2, %3;")
1099 (define_insn "*setcc_int<mode>"
1100 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1102 (match_operator:SI 1 "nvptx_float_comparison_operator"
1103 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1104 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
1106 "%.\\tset%t0%c1\\t%0, %2, %3;")
1108 (define_insn "setcc_float<mode>"
1109 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1110 (match_operator:SF 1 "nvptx_comparison_operator"
1111 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1112 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
1114 "%.\\tset%t0%c1\\t%0, %2, %3;")
1116 (define_insn "setcc_float<mode>"
1117 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1118 (match_operator:SF 1 "nvptx_float_comparison_operator"
1119 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1120 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
1122 "%.\\tset%t0%c1\\t%0, %2, %3;")
1124 (define_expand "cstore<mode>4"
1125 [(set (match_operand:SI 0 "nvptx_register_operand")
1126 (match_operator:SI 1 "nvptx_comparison_operator"
1127 [(match_operand:HSDIM 2 "nvptx_register_operand")
1128 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
1131 rtx reg = gen_reg_rtx (BImode);
1132 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1133 operands[2], operands[3]);
1134 emit_move_insn (reg, cmp);
1135 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1139 (define_expand "cstore<mode>4"
1140 [(set (match_operand:SI 0 "nvptx_register_operand")
1141 (match_operator:SI 1 "nvptx_float_comparison_operator"
1142 [(match_operand:SDFM 2 "nvptx_register_operand")
1143 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
1146 rtx reg = gen_reg_rtx (BImode);
1147 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1148 operands[2], operands[3]);
1149 emit_move_insn (reg, cmp);
1150 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1154 (define_expand "cstorehf4"
1155 [(set (match_operand:SI 0 "nvptx_register_operand")
1156 (match_operator:SI 1 "nvptx_float_comparison_operator"
1157 [(match_operand:HF 2 "nvptx_register_operand")
1158 (match_operand:HF 3 "nvptx_nonmemory_operand")]))]
1161 rtx reg = gen_reg_rtx (BImode);
1162 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1163 operands[2], operands[3]);
1164 emit_move_insn (reg, cmp);
1165 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1171 (define_insn "call_insn_<mode>"
1172 [(match_parallel 2 "call_operation"
1173 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
1174 (match_operand 1))])]
1177 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
1180 (define_insn "call_value_insn_<mode>"
1181 [(match_parallel 3 "call_operation"
1182 [(set (match_operand 0 "nvptx_register_operand" "=R")
1183 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
1184 (match_operand 2)))])]
1187 return nvptx_output_call_insn (insn, operands[0], operands[1]);
1190 (define_expand "call"
1191 [(match_operand 0 "" "")]
1194 nvptx_expand_call (NULL_RTX, operands[0]);
1198 (define_expand "call_value"
1199 [(match_operand 0 "" "")
1200 (match_operand 1 "" "")]
1203 nvptx_expand_call (operands[0], operands[1]);
1207 ;; Floating point arithmetic.
1209 (define_insn "add<mode>3"
1210 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1211 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1212 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1214 "%.\\tadd%t0\\t%0, %1, %2;")
1216 (define_insn "sub<mode>3"
1217 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1218 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1219 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
1221 "%.\\tsub%t0\\t%0, %1, %2;")
1223 (define_insn "mul<mode>3"
1224 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1225 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1226 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1228 "%.\\tmul%t0\\t%0, %1, %2;")
1230 (define_insn "fma<mode>4"
1231 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1232 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1233 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1234 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1236 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
1238 (define_insn "*recip<mode>2"
1239 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1241 (match_operand:SDFM 2 "const_double_operand" "F")
1242 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1243 "CONST_DOUBLE_P (operands[2])
1244 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
1245 "%.\\trcp%#%t0\\t%0, %1;")
1247 (define_insn "div<mode>3"
1248 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1249 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1250 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1252 "%.\\tdiv%#%t0\\t%0, %1, %2;")
1254 (define_insn "copysign<mode>3"
1255 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1256 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
1257 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")]
1260 "%.\\tcopysign%t0\\t%0, %2, %1;")
1262 (define_insn "smin<mode>3"
1263 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1264 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1265 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1267 "%.\\tmin%t0\\t%0, %1, %2;")
1269 (define_insn "smax<mode>3"
1270 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1271 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1272 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1274 "%.\\tmax%t0\\t%0, %1, %2;")
1276 (define_insn "abs<mode>2"
1277 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1278 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1280 "%.\\tabs%t0\\t%0, %1;")
1282 (define_insn "neg<mode>2"
1283 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1284 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1286 "%.\\tneg%t0\\t%0, %1;")
1288 (define_insn "sqrt<mode>2"
1289 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1290 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1292 "%.\\tsqrt%#%t0\\t%0, %1;")
1294 (define_expand "sincossf3"
1295 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1296 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
1298 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
1299 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1300 "flag_unsafe_math_optimizations"
1302 operands[2] = make_safe_from (operands[2], operands[0]);
1305 (define_insn "sinsf2"
1306 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1307 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1309 "flag_unsafe_math_optimizations"
1310 "%.\\tsin.approx%t0\\t%0, %1;")
1312 (define_insn "cossf2"
1313 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1314 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1316 "flag_unsafe_math_optimizations"
1317 "%.\\tcos.approx%t0\\t%0, %1;")
1319 (define_insn "log2sf2"
1320 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1321 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1323 "flag_unsafe_math_optimizations"
1324 "%.\\tlg2.approx%t0\\t%0, %1;")
1326 (define_insn "exp2sf2"
1327 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1328 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1330 "flag_unsafe_math_optimizations"
1331 "%.\\tex2.approx%t0\\t%0, %1;")
1333 (define_insn "setcc_isinf<mode>"
1334 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
1335 (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1338 "%.\\ttestp.infinite%t1\\t%0, %1;")
1340 (define_expand "isinf<mode>2"
1341 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1342 (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1346 rtx pred = gen_reg_rtx (BImode);
1347 emit_insn (gen_setcc_isinf<mode> (pred, operands[1]));
1348 emit_insn (gen_setccsi_from_bi (operands[0], pred));
1352 ;; HFmode floating point arithmetic.
1354 (define_insn "addhf3"
1355 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1356 (plus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1357 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1359 "%.\\tadd.f16\\t%0, %1, %2;")
1361 (define_insn "subhf3"
1362 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1363 (minus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1364 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1366 "%.\\tsub.f16\\t%0, %1, %2;")
1368 (define_insn "mulhf3"
1369 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1370 (mult:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1371 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1373 "%.\\tmul.f16\\t%0, %1, %2;")
1375 (define_insn "fmahf4"
1376 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1377 (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1378 (match_operand:HF 2 "nvptx_nonmemory_operand" "RF")
1379 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))]
1381 "%.\\tfma%#.f16\\t%0, %1, %2, %3;")
1383 (define_insn "neghf2"
1384 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1385 (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1387 "%.\\txor.b16\\t%0, %1, -32768;")
1389 (define_insn "abshf2"
1390 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1391 (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1393 "%.\\tand.b16\\t%0, %1, 32767;")
1395 (define_insn "exp2hf2"
1396 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1397 (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
1399 "TARGET_SM75 && flag_unsafe_math_optimizations"
1400 "%.\\tex2.approx.f16\\t%0, %1;")
1402 (define_insn "tanh<mode>2"
1403 [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R")
1404 (unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")]
1406 "TARGET_SM75 && flag_unsafe_math_optimizations"
1407 "%.\\ttanh.approx%t0\\t%0, %1;")
1409 ;; HFmode floating point arithmetic.
1411 (define_insn "sminhf3"
1412 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1413 (smin:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1414 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1416 "%.\\tmin.f16\\t%0, %1, %2;")
1418 (define_insn "smaxhf3"
1419 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1420 (smax:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1421 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1423 "%.\\tmax.f16\\t%0, %1, %2;")
1425 ;; Conversions involving floating point
1427 (define_insn "extendsfdf2"
1428 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1429 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1431 "%.\\tcvt%t0%t1\\t%0, %1;")
1433 (define_insn "truncdfsf2"
1434 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1435 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1437 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1439 (define_insn "floatunssi<mode>2"
1440 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1441 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1443 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1445 (define_insn "floatsi<mode>2"
1446 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1447 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1449 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1451 (define_insn "floatunsdi<mode>2"
1452 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1453 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1455 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1457 (define_insn "floatdi<mode>2"
1458 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1459 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1461 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1463 (define_insn "fixuns_trunc<mode>si2"
1464 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1465 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1467 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1469 (define_insn "fix_trunc<mode>si2"
1470 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1471 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1473 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1475 (define_insn "fixuns_trunc<mode>di2"
1476 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1477 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1479 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1481 (define_insn "fix_trunc<mode>di2"
1482 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1483 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1485 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1487 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1488 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1489 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1490 (UNSPEC_FPINT_BTRUNC "btrunc")
1491 (UNSPEC_FPINT_CEIL "ceil")
1492 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1493 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1494 (UNSPEC_FPINT_BTRUNC ".rzi")
1495 (UNSPEC_FPINT_CEIL ".rpi")
1496 (UNSPEC_FPINT_NEARBYINT "%#i")])
1498 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1499 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1500 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1503 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1505 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1506 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1507 (UNSPEC_FPINT_CEIL "lceil")])
1508 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1509 (UNSPEC_FPINT_CEIL ".rpi")])
1511 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1512 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1513 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1516 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1518 (define_insn "extendhf<mode>2"
1519 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1520 (float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))]
1522 "%.\\tcvt%t0%t1\\t%0, %1;")
1524 (define_insn "trunc<mode>hf2"
1525 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1526 (float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1528 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1530 ;; Vector operations
1532 (define_insn "*vec_set<mode>_0"
1533 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1535 (vec_duplicate:VECIM
1536 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1540 "%.\\tmov%t1\\t%0.x, %1;")
1542 (define_insn "*vec_set<mode>_1"
1543 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1545 (vec_duplicate:VECIM
1546 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1550 "%.\\tmov%t1\\t%0.y, %1;")
1552 (define_insn "*vec_set<mode>_2"
1553 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1555 (vec_duplicate:VECIM
1556 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1560 "%.\\tmov%t1\\t%0.z, %1;")
1562 (define_insn "*vec_set<mode>_3"
1563 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1565 (vec_duplicate:VECIM
1566 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1570 "%.\\tmov%t1\\t%0.w, %1;")
1572 (define_expand "vec_set<mode>"
1573 [(match_operand:VECIM 0 "nvptx_register_operand")
1574 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1575 (match_operand:SI 2 "nvptx_vector_index_operand")]
1578 enum machine_mode mode = GET_MODE (operands[0]);
1579 int mask = 1 << INTVAL (operands[2]);
1580 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1581 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1582 emit_insn (gen_rtx_SET (operands[0], tmp));
1586 (define_insn "vec_extract<mode><Vecelem>"
1587 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1588 (vec_select:<VECELEM>
1589 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1590 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1593 static const char *const asms[4] = {
1594 "%.\\tmov%t0\\t%0, %1.x;",
1595 "%.\\tmov%t0\\t%0, %1.y;",
1596 "%.\\tmov%t0\\t%0, %1.z;",
1597 "%.\\tmov%t0\\t%0, %1.w;"
1599 return asms[INTVAL (operands[2])];
1614 (define_insn "fake_nop"
1618 .reg .u32 %%nop_src;
1619 .reg .u32 %%nop_dst;
1620 mov.u32 %%nop_dst, %%nop_src;
1623 (define_insn "return"
1627 return nvptx_output_return ();
1629 [(set_attr "predicable" "no")])
1631 (define_expand "epilogue"
1632 [(clobber (const_int 0))]
1635 if (TARGET_SOFT_STACK)
1636 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1637 SOFTSTACK_PREV_REGNUM)));
1638 emit_jump_insn (gen_return ());
1642 (define_expand "nonlocal_goto"
1643 [(match_operand 0 "" "")
1644 (match_operand 1 "" "")
1645 (match_operand 2 "" "")
1646 (match_operand 3 "" "")]
1649 sorry ("target cannot support nonlocal goto");
1650 emit_insn (gen_nop ());
1654 (define_expand "nonlocal_goto_receiver"
1658 sorry ("target cannot support nonlocal goto");
1661 (define_expand "allocate_stack"
1662 [(match_operand 0 "nvptx_register_operand")
1663 (match_operand 1 "nvptx_register_operand")]
1666 if (TARGET_SOFT_STACK)
1668 emit_move_insn (stack_pointer_rtx,
1669 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1670 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1671 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1674 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1675 only) but notes it is not implemented. The assembler emits a
1676 confused error message. Issue a blunt one now instead. */
1677 sorry ("target cannot support alloca");
1678 emit_insn (gen_nop ());
1682 (define_insn "@set_softstack_<mode>"
1683 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1684 UNSPEC_SET_SOFTSTACK)]
1687 return nvptx_output_set_softstack (REGNO (operands[0]));
1690 (define_expand "restore_stack_block"
1691 [(match_operand 0 "register_operand" "")
1692 (match_operand 1 "register_operand" "")]
1695 if (TARGET_SOFT_STACK)
1697 emit_move_insn (operands[0], operands[1]);
1698 emit_insn (gen_set_softstack (Pmode, operands[0]));
1703 (define_expand "restore_stack_function"
1704 [(match_operand 0 "register_operand" "")
1705 (match_operand 1 "register_operand" "")]
1712 [(trap_if (const_int 1) (const_int 0))]
1716 (define_insn "trap_if_true"
1717 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1721 "%j0 trap; %j0 exit;"
1722 [(set_attr "predicable" "no")])
1724 (define_insn "trap_if_false"
1725 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1729 "%J0 trap; %J0 exit;"
1730 [(set_attr "predicable" "no")])
1732 (define_expand "ctrap<mode>4"
1733 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1734 [(match_operand:SDIM 1 "nvptx_register_operand")
1735 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1736 (match_operand 3 "const0_operand"))]
1739 rtx t = nvptx_expand_compare (operands[0]);
1740 emit_insn (gen_trap_if_true (t));
1744 (define_insn "oacc_dim_size"
1745 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1746 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1750 static const char *const asms[] =
1751 { /* Must match oacc_loop_levels ordering. */
1752 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1753 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1754 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1756 return asms[INTVAL (operands[1])];
1759 (define_insn "oacc_dim_pos"
1760 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1761 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1765 static const char *const asms[] =
1766 { /* Must match oacc_loop_levels ordering. */
1767 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1768 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1769 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1771 return asms[INTVAL (operands[1])];
1774 (define_insn "nvptx_fork"
1775 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1779 [(set_attr "predicable" "no")])
1781 (define_insn "nvptx_forked"
1782 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1786 [(set_attr "predicable" "no")])
1788 (define_insn "nvptx_joining"
1789 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1793 [(set_attr "predicable" "no")])
1795 (define_insn "nvptx_join"
1796 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1800 [(set_attr "predicable" "no")])
1802 (define_expand "oacc_fork"
1803 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1804 (match_operand:SI 1 "general_operand" ""))
1805 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1809 if (operands[0] != const0_rtx)
1810 emit_move_insn (operands[0], operands[1]);
1811 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1815 (define_expand "oacc_join"
1816 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1817 (match_operand:SI 1 "general_operand" ""))
1818 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1822 if (operands[0] != const0_rtx)
1823 emit_move_insn (operands[0], operands[1]);
1824 nvptx_expand_oacc_join (INTVAL (operands[2]));
1828 ;; only 32-bit shuffles exist.
1829 (define_insn "nvptx_shuffle<mode>"
1830 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1832 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1833 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1834 (match_operand:SI 3 "const_int_operand" "n")]
1839 return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;";
1841 return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;";
1844 (define_insn "nvptx_vote_ballot"
1845 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1846 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1847 UNSPEC_VOTE_BALLOT))]
1851 return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;";
1853 return "%.\\tvote.ballot.b32\\t%0, %1;";
1856 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1858 (define_insn "@omp_simt_enter_<mode>"
1859 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1860 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1861 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1862 UNSPECV_SIMT_ENTER))]
1865 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1868 (define_expand "omp_simt_enter"
1869 [(match_operand 0 "nvptx_register_operand" "=R")
1870 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1871 (match_operand 2 "const_int_operand" "n")]
1874 if (!CONST_INT_P (operands[1]))
1875 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1877 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1878 cfun->machine->simt_stack_size);
1879 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1880 cfun->machine->simt_stack_align);
1881 cfun->machine->has_simtreg = true;
1882 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1886 (define_expand "omp_simt_exit"
1887 [(match_operand 0 "nvptx_register_operand" "R")]
1890 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1892 emit_insn (gen_nvptx_warpsync ());
1894 emit_insn (gen_nvptx_uniform_warp_check ());
1898 (define_insn "@omp_simt_exit_<mode>"
1899 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1903 return nvptx_output_simt_exit (operands[0]);
1906 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1907 (define_insn "omp_simt_lane"
1908 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1909 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1911 "%.\\tmov.u32\\t%0, %%laneid;")
1913 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1914 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1915 (define_expand "omp_simt_ordered"
1916 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1917 (match_operand:SI 1 "nvptx_register_operand" "R")]
1920 emit_move_insn (operands[0], operands[1]);
1921 emit_insn (gen_nvptx_nounroll ());
1925 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1927 (define_expand "omp_simt_xchg_bfly"
1928 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1929 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1930 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1933 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1938 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1939 ;; from lane given by index in operand 2 to operand 0 in all lanes
1940 (define_expand "omp_simt_xchg_idx"
1941 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1942 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1943 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1946 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1951 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1952 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1953 (define_expand "omp_simt_vote_any"
1954 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1955 (match_operand:SI 1 "nvptx_register_operand" "R")]
1958 rtx pred = gen_reg_rtx (BImode);
1959 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1960 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1964 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1965 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1966 (define_expand "omp_simt_last_lane"
1967 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1968 (match_operand:SI 1 "nvptx_register_operand" "R")]
1971 rtx pred = gen_reg_rtx (BImode);
1972 rtx tmp = gen_reg_rtx (SImode);
1973 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1974 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1975 emit_insn (gen_ctzsi2 (operands[0], tmp));
1979 ;; extract parts of a 64 bit object into 2 32-bit ints
1980 (define_insn "unpack<mode>si2"
1981 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1982 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1983 (const_int 0)] UNSPEC_BIT_CONV))
1984 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1985 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1987 "%.\\tmov.b64\\t{%0,%1}, %2;")
1989 ;; pack 2 32-bit ints into a 64 bit object
1990 (define_insn "packsi<mode>2"
1991 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1992 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1993 (match_operand:SI 2 "nvptx_register_operand" "R")]
1996 "%.\\tmov.b64\\t%0, {%1,%2};")
2000 (define_expand "atomic_compare_and_swap<mode>"
2001 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
2002 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
2003 (match_operand:SDIM 2 "memory_operand") ;; memory
2004 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
2005 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
2006 (match_operand:SI 5 "const_int_operand") ;; is_weak
2007 (match_operand:SI 6 "const_int_operand") ;; success model
2008 (match_operand:SI 7 "const_int_operand")] ;; failure model
2011 if (nvptx_mem_local_p (operands[2]))
2012 emit_insn (gen_atomic_compare_and_swap<mode>_1_local
2013 (operands[1], operands[2], operands[3], operands[4],
2016 emit_insn (gen_atomic_compare_and_swap<mode>_1
2017 (operands[1], operands[2], operands[3], operands[4],
2020 rtx cond = gen_reg_rtx (BImode);
2021 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
2022 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
2026 (define_insn "atomic_compare_and_swap<mode>_1_local"
2027 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2028 (unspec_volatile:SDIM
2029 [(match_operand:SDIM 1 "memory_operand" "+m")
2030 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2031 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2032 (match_operand:SI 4 "const_int_operand")]
2035 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
2038 output_asm_insn ("{", NULL);
2039 output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
2040 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2041 output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2042 output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;",
2044 output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands);
2045 output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2046 output_asm_insn ("}", NULL);
2049 [(set_attr "predicable" "no")])
2051 (define_insn "atomic_compare_and_swap<mode>_1"
2052 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2053 (unspec_volatile:SDIM
2054 [(match_operand:SDIM 1 "memory_operand" "+m")
2055 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2056 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2057 (match_operand:SI 4 "const_int_operand")]
2060 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
2064 = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;";
2065 return nvptx_output_atomic_insn (t, operands, 1, 4);
2067 [(set_attr "atomic" "true")])
2069 (define_insn "atomic_exchange<mode>"
2070 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
2071 (unspec_volatile:SDIM
2072 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
2073 (match_operand:SI 3 "const_int_operand")] ;; model
2076 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
2079 if (nvptx_mem_local_p (operands[1]))
2081 output_asm_insn ("{", NULL);
2082 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2083 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2084 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands);
2085 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2086 output_asm_insn ("}", NULL);
2090 = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;";
2091 return nvptx_output_atomic_insn (t, operands, 1, 3);
2093 [(set_attr "atomic" "true")])
2095 (define_expand "atomic_store<mode>"
2096 [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory
2097 (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2098 (match_operand:SI 2 "const_int_operand")] ;; model
2101 struct address_info info;
2102 decompose_mem_address (&info, operands[0]);
2103 if (info.base != NULL && REG_P (*info.base)
2104 && REGNO_PTR_FRAME_P (REGNO (*info.base)))
2106 emit_insn (gen_mov<mode> (operands[0], operands[1]));
2112 emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1],
2117 bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
2118 if (!maybe_shared_p)
2119 /* Fall back to expand_atomic_store. */
2122 emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
2127 (define_insn "nvptx_atomic_store_sm70<mode>"
2128 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2129 (unspec_volatile:SDIM
2130 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2131 (match_operand:SI 2 "const_int_operand")] ;; model
2136 = "%.\tst%A0.b%T0\t%0, %1;";
2137 return nvptx_output_atomic_insn (t, operands, 0, 2);
2139 [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn.
2141 (define_insn "nvptx_atomic_store<mode>"
2142 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2143 (unspec_volatile:SDIM
2144 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2145 (match_operand:SI 2 "const_int_operand")] ;; model
2150 = "%.\tatom%A0.exch.b%T0\t_, %0, %1;";
2151 return nvptx_output_atomic_insn (t, operands, 0, 2);
2153 [(set_attr "atomic" "true")])
2155 (define_insn "atomic_fetch_add<mode>"
2156 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2157 (unspec_volatile:SDIM
2158 [(plus:SDIM (match_dup 1)
2159 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2160 (match_operand:SI 3 "const_int_operand")] ;; model
2162 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2166 if (nvptx_mem_local_p (operands[1]))
2168 output_asm_insn ("{", NULL);
2169 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2170 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2171 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2172 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2174 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2175 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2176 output_asm_insn ("}", NULL);
2180 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2181 return nvptx_output_atomic_insn (t, operands, 1, 3);
2183 [(set_attr "atomic" "true")])
2185 (define_insn "atomic_fetch_addsf"
2186 [(set (match_operand:SF 1 "memory_operand" "+m")
2188 [(plus:SF (match_dup 1)
2189 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
2190 (match_operand:SI 3 "const_int_operand")] ;; model
2192 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
2196 if (nvptx_mem_local_p (operands[1]))
2198 output_asm_insn ("{", NULL);
2199 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2200 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2201 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2202 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2204 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2205 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2206 output_asm_insn ("}", NULL);
2210 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2211 return nvptx_output_atomic_insn (t, operands, 1, 3);
2213 [(set_attr "atomic" "true")])
2215 (define_insn "atomic_fetch_<logic><mode>"
2216 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2217 (unspec_volatile:SDIM
2218 [(any_logic:SDIM (match_dup 1)
2219 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2220 (match_operand:SI 3 "const_int_operand")] ;; model
2222 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2224 "<MODE>mode == SImode || TARGET_SM35"
2226 if (nvptx_mem_local_p (operands[1]))
2228 output_asm_insn ("{", NULL);
2229 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
2230 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands);
2231 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2232 output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;",
2234 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2235 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2236 output_asm_insn ("}", NULL);
2240 = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;";
2241 return nvptx_output_atomic_insn (t, operands, 1, 3);
2244 [(set_attr "atomic" "true")])
2246 (define_expand "atomic_test_and_set"
2247 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
2248 (match_operand:QI 1 "memory_operand") ;; memory
2249 (match_operand:SI 2 "const_int_operand")] ;; model
2254 libfunc = init_one_libfunc ("__atomic_test_and_set_1");
2255 addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0));
2256 emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode,
2258 operands[2], SImode);
2262 (define_insn "nvptx_barsync"
2263 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
2264 (match_operand:SI 1 "const_int_operand")]
2268 if (INTVAL (operands[1]) == 0)
2269 return (TARGET_PTX_6_0
2270 ? "\\tbarrier.sync.aligned\\t%0;"
2271 : "\\tbar.sync\\t%0;");
2273 return (TARGET_PTX_6_0
2274 ? "\\tbarrier.sync\\t%0, %1;"
2275 : "\\tbar.sync\\t%0, %1;");
2277 [(set_attr "predicable" "no")])
2279 (define_insn "nvptx_warpsync"
2280 [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
2282 "%.\\tbar.warp.sync\\t0xffffffff;")
2284 (define_int_iterator BARRED
2287 UNSPECV_BARRED_POPC])
2288 (define_int_attr barred_op
2289 [(UNSPECV_BARRED_AND "and")
2290 (UNSPECV_BARRED_OR "or")
2291 (UNSPECV_BARRED_POPC "popc")])
2292 (define_int_attr barred_mode
2293 [(UNSPECV_BARRED_AND "BI")
2294 (UNSPECV_BARRED_OR "BI")
2295 (UNSPECV_BARRED_POPC "SI")])
2296 (define_int_attr barred_ptxtype
2297 [(UNSPECV_BARRED_AND "pred")
2298 (UNSPECV_BARRED_OR "pred")
2299 (UNSPECV_BARRED_POPC "u32")])
2301 (define_insn "nvptx_barred_<barred_op>"
2302 [(set (match_operand:<barred_mode> 0 "nvptx_register_operand" "=R")
2304 [(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri")
2305 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
2306 (match_operand:SI 3 "const_int_operand" "i")
2307 (match_operand:BI 4 "nvptx_register_operand" "R")]
2310 "\\tbar.red.<barred_op>.<barred_ptxtype> \\t%0, %1, %2, %p3%4;";"
2311 [(set_attr "predicable" "no")])
2313 (define_insn "nvptx_uniform_warp_check"
2314 [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
2317 const char *insns[] = {
2319 "\\t" ".reg.pred" "\\t" "%%r_sync;",
2320 "\\t" "mov.pred" "\\t" "%%r_sync, 1;",
2321 "%.\\t" "vote.all.pred" "\\t" "%%r_sync, 1;",
2322 "@!%%r_sync\\t" "trap;",
2323 "@!%%r_sync\\t" "exit;",
2327 for (const char **p = &insns[0]; *p != NULL; p++)
2328 output_asm_insn (*p, NULL);
2332 (define_expand "memory_barrier"
2334 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2337 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2338 MEM_VOLATILE_P (operands[0]) = 1;
2341 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
2342 ;; (corresponding to cuda functions threadfence_block, threadfence and
2343 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
2344 ;; may be overconservative, but before using membar.gl instead we'll need to
2345 ;; explain in detail why it's safe to use. For now, use membar.sys.
2346 (define_insn "*memory_barrier"
2347 [(set (match_operand:BLK 0 "" "")
2348 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2351 [(set_attr "predicable" "no")])
2353 (define_expand "nvptx_membar_cta"
2355 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2358 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2359 MEM_VOLATILE_P (operands[0]) = 1;
2362 (define_insn "*nvptx_membar_cta"
2363 [(set (match_operand:BLK 0 "" "")
2364 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2367 [(set_attr "predicable" "no")])
2369 (define_expand "nvptx_membar_gl"
2371 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2374 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2375 MEM_VOLATILE_P (operands[0]) = 1;
2378 (define_insn "*nvptx_membar_gl"
2379 [(set (match_operand:BLK 0 "" "")
2380 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2383 [(set_attr "predicable" "no")])
2385 (define_insn "nvptx_nounroll"
2386 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
2388 "\\t.pragma \\\"nounroll\\\";"
2389 [(set_attr "predicable" "no")])
2391 (define_insn "nvptx_red_partition"
2392 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
2393 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
2397 return nvptx_output_red_partition (operands[0], operands[1]);
2399 [(set_attr "predicable" "no")])
2401 ;; Expand QI mode operations using SI mode instructions.
2402 (define_code_iterator any_sbinary [plus minus smin smax])
2403 (define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")])
2405 (define_code_iterator any_ubinary [and ior xor umin umax])
2406 (define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin")
2409 (define_code_iterator any_sunary [neg abs])
2410 (define_code_attr sunary [(neg "neg") (abs "abs")])
2412 (define_code_iterator any_uunary [not])
2413 (define_code_attr uunary [(not "one_cmpl")])
2415 (define_expand "<sbinary>qi3"
2416 [(set (match_operand:QI 0 "nvptx_register_operand")
2417 (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2418 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2421 rtx reg = gen_reg_rtx (SImode);
2422 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2423 rtx op1 = convert_modes (SImode, QImode, operands[2], 0);
2424 if (<CODE> == MINUS)
2425 op0 = force_reg (SImode, op0);
2426 emit_insn (gen_<sbinary>si3 (reg, op0, op1));
2427 emit_insn (gen_truncsiqi2 (operands[0], reg));
2431 (define_expand "<ubinary>qi3"
2432 [(set (match_operand:QI 0 "nvptx_register_operand")
2433 (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2434 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2437 rtx reg = gen_reg_rtx (SImode);
2438 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2439 rtx op1 = convert_modes (SImode, QImode, operands[2], 1);
2440 emit_insn (gen_<ubinary>si3 (reg, op0, op1));
2441 emit_insn (gen_truncsiqi2 (operands[0], reg));
2445 (define_expand "<sunary>qi2"
2446 [(set (match_operand:QI 0 "nvptx_register_operand")
2447 (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2450 rtx reg = gen_reg_rtx (SImode);
2451 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2452 emit_insn (gen_<sunary>si2 (reg, op0));
2453 emit_insn (gen_truncsiqi2 (operands[0], reg));
2457 (define_expand "<uunary>qi2"
2458 [(set (match_operand:QI 0 "nvptx_register_operand")
2459 (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2462 rtx reg = gen_reg_rtx (SImode);
2463 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2464 emit_insn (gen_<uunary>si2 (reg, op0));
2465 emit_insn (gen_truncsiqi2 (operands[0], reg));
2469 (define_expand "cstoreqi4"
2470 [(set (match_operand:SI 0 "nvptx_register_operand")
2471 (match_operator:SI 1 "nvptx_comparison_operator"
2472 [(match_operand:QI 2 "nvptx_nonmemory_operand")
2473 (match_operand:QI 3 "nvptx_nonmemory_operand")]))]
2476 rtx reg = gen_reg_rtx (BImode);
2477 enum rtx_code code = GET_CODE (operands[1]);
2478 int unsignedp = unsigned_condition_p (code);
2479 rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp);
2480 rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp);
2481 rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3);
2482 emit_insn (gen_cmpsi (reg, cmp, op2, op3));
2483 emit_insn (gen_setccsi_from_bi (operands[0], reg));
2487 (define_insn "*ext_truncsi2_qi"
2488 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2490 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2492 "%.\\tcvt.s32.s8\\t%0, %1;")
2494 (define_insn "*zext_truncsi2_qi"
2495 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2497 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2499 "%.\\tcvt.u32.u8\\t%0, %1;")