1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
35 UNSPEC_FPINT_NEARBYINT
53 (define_c_enum "unspecv" [
64 UNSPECV_UNIFORM_WARP_CHECK
83 (define_attr "subregs_ok" "false,true"
84 (const_string "false"))
86 (define_attr "atomic" "false,true"
87 (const_string "false"))
89 ;; The nvptx operand predicates, in general, don't permit subregs and
90 ;; only literal constants, which differ from the generic ones, which
91 ;; permit subregs and symbolc constants (as appropriate)
92 (define_predicate "nvptx_register_operand"
95 return register_operand (op, mode);
98 (define_predicate "nvptx_register_or_complex_di_df_register_operand"
99 (ior (match_code "reg")
100 (match_code "concat"))
102 if (GET_CODE (op) == CONCAT)
103 return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode)
104 && nvptx_register_operand (XEXP (op, 0), mode)
105 && nvptx_register_operand (XEXP (op, 1), mode));
107 return nvptx_register_operand (op, mode);
110 (define_predicate "nvptx_nonimmediate_operand"
111 (match_code "mem,reg")
113 return (REG_P (op) ? register_operand (op, mode)
114 : memory_operand (op, mode));
117 (define_predicate "nvptx_nonmemory_operand"
118 (match_code "reg,const_int,const_double")
120 return (REG_P (op) ? register_operand (op, mode)
121 : immediate_operand (op, mode));
124 (define_predicate "const0_operand"
125 (and (match_code "const_int")
126 (match_test "op == const0_rtx")))
128 ;; True if this operator is valid for predication.
129 (define_predicate "predicate_operator"
130 (match_code "eq,ne"))
132 (define_predicate "ne_operator"
135 (define_predicate "nvptx_comparison_operator"
136 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
138 (define_predicate "nvptx_float_comparison_operator"
139 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
141 (define_predicate "nvptx_vector_index_operand"
142 (and (match_code "const_int")
143 (match_test "UINTVAL (op) < 4")))
145 ;; Test for a valid operand for a call instruction.
146 (define_predicate "call_insn_operand"
147 (match_code "symbol_ref,reg")
149 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
152 ;; Return true if OP is a call with parallel USEs of the argument
154 (define_predicate "call_operation"
155 (match_code "parallel")
157 int arg_end = XVECLEN (op, 0);
159 for (int i = 1; i < arg_end; i++)
161 rtx elt = XVECEXP (op, 0, i);
163 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
169 ;; Test for a function symbol ref operand
170 (define_predicate "symbol_ref_function_operand"
171 (match_code "symbol_ref")
173 return SYMBOL_REF_FUNCTION_P (op);
176 (define_attr "predicable" "no,yes"
177 (const_string "yes"))
180 [(match_operator 0 "predicate_operator"
181 [(match_operand:BI 1 "nvptx_register_operand" "")
182 (match_operand:BI 2 "const0_operand" "")])]
187 (define_constraint "P0"
188 "An integer with the value 0."
189 (and (match_code "const_int")
190 (match_test "ival == 0")))
192 (define_constraint "P1"
193 "An integer with the value 1."
194 (and (match_code "const_int")
195 (match_test "ival == 1")))
197 (define_constraint "Pn"
198 "An integer with the value -1."
199 (and (match_code "const_int")
200 (match_test "ival == -1")))
202 (define_constraint "R"
206 (define_constraint "Ia"
207 "Any integer constant."
208 (and (match_code "const_int") (match_test "true")))
210 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
211 (define_mode_iterator QHSDIM [QI HI SI DI])
212 (define_mode_iterator HSDIM [HI SI DI])
213 (define_mode_iterator BHSDIM [BI HI SI DI])
214 (define_mode_iterator SDIM [SI DI])
215 (define_mode_iterator SDISDFM [SI DI SF DF])
216 (define_mode_iterator QHIM [QI HI])
217 (define_mode_iterator QHSIM [QI HI SI])
218 (define_mode_iterator SDFM [SF DF])
219 (define_mode_iterator HSFM [HF SF])
220 (define_mode_iterator SDCM [SC DC])
221 (define_mode_iterator BITS [SI SF])
222 (define_mode_iterator BITD [DI DF])
223 (define_mode_iterator VECIM [V2SI V2DI])
225 ;; This mode iterator allows :P to be used for patterns that operate on
226 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
227 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
229 ;; Define element mode for each vector mode.
230 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
231 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
233 ;; We should get away with not defining memory alternatives, since we don't
234 ;; get variables in this mode and pseudos are never spilled.
236 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
237 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
240 %.\\tmov%t0\\t%0, %1;
241 %.\\tsetp.eq.u32\\t%0, 1, 0;
242 %.\\tsetp.eq.u32\\t%0, 1, 1;")
244 (define_insn "*mov<mode>_insn"
245 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
246 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
247 "!MEM_P (operands[0]) || REG_P (operands[1])"
249 if (which_alternative == 1)
250 return "%.\\tld%A1%u1\\t%0, %1;";
251 if (which_alternative == 2)
252 return "%.\\tst%A0%u0\\t%0, %1;";
254 return nvptx_output_mov_insn (operands[0], operands[1]);
256 [(set_attr "subregs_ok" "true")])
258 (define_insn "*mov<mode>_insn"
259 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
260 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
261 "!MEM_P (operands[0]) || REG_P (operands[1])"
263 if (which_alternative == 1)
264 return "%.\\tld%A1%u1\\t%0, %1;";
265 if (which_alternative == 2)
266 return "%.\\tst%A0%u0\\t%0, %1;";
268 return nvptx_output_mov_insn (operands[0], operands[1]);
270 [(set_attr "subregs_ok" "true")])
272 ;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
274 [(set (match_operand:DI 0 "nvptx_register_operand")
275 (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
276 (match_operand 2 "const_int_operand"))))]
278 [(set (match_dup 0) (match_dup 1))
279 (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
283 (define_insn "*mov<mode>_insn"
284 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
285 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
286 "!MEM_P (operands[0]) || REG_P (operands[1])"
288 if (which_alternative == 1)
289 return "%.\\tld%A1%u0\\t%0, %1;";
290 if (which_alternative == 2)
291 return "%.\\tst%A0%u1\\t%0, %1;";
293 return nvptx_output_mov_insn (operands[0], operands[1]);
295 [(set_attr "subregs_ok" "true")])
297 (define_insn "*movhf_insn"
298 [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m")
299 (match_operand:HF 1 "nonimmediate_operand" "R,m,R"))]
300 "!MEM_P (operands[0]) || REG_P (operands[1])"
302 %.\\tmov.b16\\t%0, %1;
303 %.\\tld.b16\\t%0, %1;
304 %.\\tst.b16\\t%0, %1;"
305 [(set_attr "subregs_ok" "true")])
307 (define_expand "movhf"
308 [(set (match_operand:HF 0 "nonimmediate_operand" "")
309 (match_operand:HF 1 "nonimmediate_operand" ""))]
312 /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE. */
313 if (CONST_DOUBLE_P (operands[1]))
315 rtx tmp1 = gen_reg_rtx (SFmode);
316 REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]);
317 real_convert (&d, SFmode, &d);
318 emit_move_insn (tmp1, const_double_from_real_value (d, SFmode));
320 if (!REG_P (operands[0]))
322 rtx tmp2 = gen_reg_rtx (HFmode);
323 emit_insn (gen_truncsfhf2 (tmp2, tmp1));
324 emit_move_insn (operands[0], tmp2);
327 emit_insn (gen_truncsfhf2 (operands[0], tmp1));
331 if (MEM_P (operands[0]) && !REG_P (operands[1]))
333 rtx tmp = gen_reg_rtx (HFmode);
334 emit_move_insn (tmp, operands[1]);
335 emit_move_insn (operands[0], tmp);
340 (define_insn "load_arg_reg<mode>"
341 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
342 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
345 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
347 (define_insn "load_arg_reg<mode>"
348 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
349 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
352 "%.\\tmov%t0\\t%0, %%ar%1;")
354 (define_expand "mov<mode>"
355 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
356 (match_operand:VECIM 1 "general_operand" ""))]
359 if (MEM_P (operands[0]) && !REG_P (operands[1]))
361 rtx tmp = gen_reg_rtx (<MODE>mode);
362 emit_move_insn (tmp, operands[1]);
363 emit_move_insn (operands[0], tmp);
368 (define_expand "mov<mode>"
369 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
370 (match_operand:QHSDISDFM 1 "general_operand" ""))]
373 if (MEM_P (operands[0]) && !REG_P (operands[1]))
375 rtx tmp = gen_reg_rtx (<MODE>mode);
376 emit_move_insn (tmp, operands[1]);
377 emit_move_insn (operands[0], tmp);
381 if (GET_CODE (operands[1]) == LABEL_REF)
382 sorry ("target cannot support label values");
385 (define_insn "zero_extendqihi2"
386 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
387 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
390 %.\\tcvt.u16.u%T1\\t%0, %1;
391 %.\\tld%A1.u8\\t%0, %1;"
392 [(set_attr "subregs_ok" "true")])
394 (define_insn "zero_extend<mode>si2"
395 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
396 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
399 %.\\tcvt.u32.u%T1\\t%0, %1;
400 %.\\tld%A1.u%T1\\t%0, %1;"
401 [(set_attr "subregs_ok" "true")])
403 (define_insn "zero_extend<mode>di2"
404 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
405 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
408 %.\\tcvt.u64.u%T1\\t%0, %1;
409 %.\\tld%A1%u1\\t%0, %1;"
410 [(set_attr "subregs_ok" "true")])
412 (define_insn "extendqihi2"
413 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
414 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
416 "%.\\tcvt.s16.s8\\t%0, %1;"
417 [(set_attr "subregs_ok" "true")])
419 (define_insn "extend<mode>si2"
420 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
421 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
424 %.\\tcvt.s32.s%T1\\t%0, %1;
425 %.\\tld%A1.s%T1\\t%0, %1;"
426 [(set_attr "subregs_ok" "true")])
428 (define_insn "extend<mode>di2"
429 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
430 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
433 %.\\tcvt.s64.s%T1\\t%0, %1;
434 %.\\tld%A1.s%T1\\t%0, %1;"
435 [(set_attr "subregs_ok" "true")])
437 (define_insn "trunchiqi2"
438 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
439 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
442 %.\\tcvt%t0.u16\\t%0, %1;
443 %.\\tst%A0.u8\\t%0, %1;"
444 [(set_attr "subregs_ok" "true")])
446 (define_insn "truncsi<mode>2"
447 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
448 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
451 if (which_alternative == 1)
452 return "%.\\tst%A0.u%T0\\t%0, %1;";
453 if (GET_MODE (operands[0]) == QImode)
454 return "%.\\tmov%t0\\t%0, %1;";
455 return "%.\\tcvt%t0.u32\\t%0, %1;";
457 [(set_attr "subregs_ok" "true")])
459 (define_insn "truncdi<mode>2"
460 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
461 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
464 %.\\tcvt%t0.u64\\t%0, %1;
465 %.\\tst%A0.u%T0\\t%0, %1;"
466 [(set_attr "subregs_ok" "true")])
468 ;; Sign-extensions of truncations
470 (define_insn "*extend_trunc_<mode>2_qi"
471 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
473 (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
475 "%.\\tcvt.s%T0.s8\\t%0, %1;"
476 [(set_attr "subregs_ok" "true")])
478 (define_insn "*extend_trunc_<mode>2_hi"
479 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
481 (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
483 "%.\\tcvt.s%T0.s16\\t%0, %1;"
484 [(set_attr "subregs_ok" "true")])
486 (define_insn "*extend_trunc_di2_si"
487 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
489 (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
491 "%.\\tcvt.s64.s32\\t%0, %1;"
492 [(set_attr "subregs_ok" "true")])
494 ;; Integer arithmetic
496 (define_insn "add<mode>3"
497 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
498 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
499 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
501 "%.\\tadd%t0\\t%0, %1, %2;")
503 (define_insn "*vadd_addsi4"
504 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
505 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
506 (match_operand:SI 2 "nvptx_register_operand" "R"))
507 (match_operand:SI 3 "nvptx_register_operand" "R")))]
509 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
511 (define_insn "*vsub_addsi4"
512 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
513 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
514 (match_operand:SI 2 "nvptx_register_operand" "R"))
515 (match_operand:SI 3 "nvptx_register_operand" "R")))]
517 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
519 (define_insn "sub<mode>3"
520 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
521 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
522 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
525 if (GET_MODE (operands[0]) == HImode)
526 /* Workaround https://developer.nvidia.com/nvidia_bug/3527713.
528 return "%.\\tsub.s16\\t%0, %1, %2;";
530 return "%.\\tsub%t0\\t%0, %1, %2;";
533 (define_insn "mul<mode>3"
534 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
535 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
536 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
538 "%.\\tmul.lo%t0\\t%0, %1, %2;")
540 (define_insn "*mad<mode>3"
541 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
542 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
543 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
544 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
546 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
548 (define_insn "div<mode>3"
549 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
550 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
551 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
553 "%.\\tdiv.s%T0\\t%0, %1, %2;")
555 (define_insn "udiv<mode>3"
556 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
557 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
558 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
560 "%.\\tdiv.u%T0\\t%0, %1, %2;")
562 (define_insn "mod<mode>3"
563 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
564 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
565 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
567 "%.\\trem.s%T0\\t%0, %1, %2;")
569 (define_insn "umod<mode>3"
570 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
571 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
572 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
574 "%.\\trem.u%T0\\t%0, %1, %2;")
576 (define_insn "smin<mode>3"
577 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
578 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
579 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
581 "%.\\tmin.s%T0\\t%0, %1, %2;")
583 (define_insn "umin<mode>3"
584 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
585 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
586 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
588 "%.\\tmin.u%T0\\t%0, %1, %2;")
590 (define_insn "smax<mode>3"
591 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
592 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
593 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
595 "%.\\tmax.s%T0\\t%0, %1, %2;")
597 (define_insn "umax<mode>3"
598 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
599 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
600 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
602 "%.\\tmax.u%T0\\t%0, %1, %2;")
604 (define_insn "abs<mode>2"
605 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
606 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
608 "%.\\tabs.s%T0\\t%0, %1;")
610 (define_insn "neg<mode>2"
611 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
612 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
614 "%.\\tneg.s%T0\\t%0, %1;")
616 (define_insn "one_cmpl<mode>2"
617 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
618 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
620 "%.\\tnot.b%T0\\t%0, %1;")
622 (define_insn "one_cmplbi2"
623 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
624 (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))]
626 "%.\\tnot.pred\\t%0, %1;")
628 (define_insn "*cnot<mode>2"
629 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
630 (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
633 "%.\\tcnot.b%T0\\t%0, %1;")
635 (define_insn "bitrev<mode>2"
636 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
637 (bitreverse:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
639 "%.\\tbrev.b%T0\\t%0, %1;")
641 (define_insn "clz<mode>2"
642 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
643 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
645 "%.\\tclz.b%T1\\t%0, %1;")
647 (define_expand "ctz<mode>2"
648 [(set (match_operand:SI 0 "nvptx_register_operand" "")
649 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
652 rtx tmpreg = gen_reg_rtx (<MODE>mode);
653 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
654 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
658 (define_insn "popcount<mode>2"
659 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
660 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
662 "%.\\tpopc.b%T1\\t%0, %1;")
664 ;; Multiplication variants
666 (define_insn "mulhisi3"
667 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
668 (mult:SI (sign_extend:SI
669 (match_operand:HI 1 "nvptx_register_operand" "R"))
671 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
673 "%.\\tmul.wide.s16\\t%0, %1, %2;")
675 (define_insn "mulsidi3"
676 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
677 (mult:DI (sign_extend:DI
678 (match_operand:SI 1 "nvptx_register_operand" "R"))
680 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
682 "%.\\tmul.wide.s32\\t%0, %1, %2;")
684 (define_insn "umulhisi3"
685 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
686 (mult:SI (zero_extend:SI
687 (match_operand:HI 1 "nvptx_register_operand" "R"))
689 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
691 "%.\\tmul.wide.u16\\t%0, %1, %2;")
693 (define_insn "umulsidi3"
694 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
695 (mult:DI (zero_extend:DI
696 (match_operand:SI 1 "nvptx_register_operand" "R"))
698 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
700 "%.\\tmul.wide.u32\\t%0, %1, %2;")
702 (define_expand "mulditi3"
703 [(set (match_operand:TI 0 "nvptx_register_operand")
704 (mult:TI (sign_extend:TI
705 (match_operand:DI 1 "nvptx_register_operand"))
707 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
710 rtx hi = gen_reg_rtx (DImode);
711 rtx lo = gen_reg_rtx (DImode);
712 emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2]));
713 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
714 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
715 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
719 (define_expand "umulditi3"
720 [(set (match_operand:TI 0 "nvptx_register_operand")
721 (mult:TI (zero_extend:TI
722 (match_operand:DI 1 "nvptx_register_operand"))
724 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
727 rtx hi = gen_reg_rtx (DImode);
728 rtx lo = gen_reg_rtx (DImode);
729 emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2]));
730 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
731 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
732 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
736 (define_insn "smul<mode>3_highpart"
737 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
739 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
740 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
742 "%.\\tmul.hi.s%T0\\t%0, %1, %2;")
744 (define_insn "umul<mode>3_highpart"
745 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
747 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
748 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
750 "%.\\tmul.hi.u%T0\\t%0, %1, %2;")
752 (define_insn "*smulhi3_highpart_2"
753 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
756 (mult:SI (sign_extend:SI
757 (match_operand:HI 1 "nvptx_register_operand" "R"))
759 (match_operand:HI 2 "nvptx_register_operand" "R")))
762 "%.\\tmul.hi.s16\\t%0, %1, %2;")
764 (define_insn "*smulsi3_highpart_2"
765 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
768 (mult:DI (sign_extend:DI
769 (match_operand:SI 1 "nvptx_register_operand" "R"))
771 (match_operand:SI 2 "nvptx_register_operand" "R")))
774 "%.\\tmul.hi.s32\\t%0, %1, %2;")
776 (define_insn "*umulhi3_highpart_2"
777 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
780 (mult:SI (zero_extend:SI
781 (match_operand:HI 1 "nvptx_register_operand" "R"))
783 (match_operand:HI 2 "nvptx_register_operand" "R")))
786 "%.\\tmul.hi.u16\\t%0, %1, %2;")
788 (define_insn "*umulsi3_highpart_2"
789 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
792 (mult:DI (zero_extend:DI
793 (match_operand:SI 1 "nvptx_register_operand" "R"))
795 (match_operand:SI 2 "nvptx_register_operand" "R")))
798 "%.\\tmul.hi.u32\\t%0, %1, %2;")
802 (define_insn "ashl<mode>3"
803 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
804 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
805 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
807 "%.\\tshl.b%T0\\t%0, %1, %2;")
809 (define_insn "ashr<mode>3"
810 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
811 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
812 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
814 "%.\\tshr.s%T0\\t%0, %1, %2;")
816 (define_insn "lshr<mode>3"
817 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
818 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
819 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
821 "%.\\tshr.u%T0\\t%0, %1, %2;")
823 (define_insn "rotlsi3"
824 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
825 (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R")
826 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
829 "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;")
831 (define_insn "rotrsi3"
832 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
833 (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R")
834 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
837 "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;")
839 ;; Logical operations
841 (define_code_iterator any_logic [and ior xor])
842 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
843 (define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")])
845 (define_insn "<ilogic><mode>3"
846 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
848 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
849 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
851 "%.\\t<logic>.b%T0\\t%0, %1, %2;")
853 (define_insn "<ilogic>bi3"
854 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
855 (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R")
856 (match_operand:BI 2 "nvptx_register_operand" "R")))]
858 "%.\\t<logic>.pred\\t%0, %1, %2;")
861 [(set (match_operand:HSDIM 0 "nvptx_register_operand")
863 (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand")
865 (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand")
867 "can_create_pseudo_p ()"
868 [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2)))
869 (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))]
871 operands[3] = gen_reg_rtx (BImode);
874 ;; Comparisons and branches
876 (define_insn "cmp<mode>"
877 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
878 (match_operator:BI 1 "nvptx_comparison_operator"
879 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
880 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
882 "%.\\tsetp%c1\\t%0, %2, %3;")
884 (define_insn "*cmp<mode>"
885 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
886 (match_operator:BI 1 "nvptx_float_comparison_operator"
887 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
888 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
890 "%.\\tsetp%c1\\t%0, %2, %3;")
892 (define_insn "*cmphf"
893 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
894 (match_operator:BI 1 "nvptx_float_comparison_operator"
895 [(match_operand:HF 2 "nvptx_register_operand" "R")
896 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
898 "%.\\tsetp%c1\\t%0, %2, %3;")
902 (label_ref (match_operand 0 "" "")))]
906 (define_insn "br_true"
908 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
910 (label_ref (match_operand 1 "" ""))
914 [(set_attr "predicable" "no")])
916 (define_insn "br_false"
918 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
920 (label_ref (match_operand 1 "" ""))
924 [(set_attr "predicable" "no")])
926 ;; unified conditional branch
927 (define_insn "br_true_uni"
928 [(set (pc) (if_then_else
929 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
930 UNSPEC_BR_UNIFIED) (const_int 0))
931 (label_ref (match_operand 1 "" "")) (pc)))]
933 "%j0\\tbra.uni\\t%l1;"
934 [(set_attr "predicable" "no")])
936 (define_insn "br_false_uni"
937 [(set (pc) (if_then_else
938 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
939 UNSPEC_BR_UNIFIED) (const_int 0))
940 (label_ref (match_operand 1 "" "")) (pc)))]
942 "%J0\\tbra.uni\\t%l1;"
943 [(set_attr "predicable" "no")])
945 (define_expand "cbranch<mode>4"
947 (if_then_else (match_operator 0 "nvptx_comparison_operator"
948 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
949 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
950 (label_ref (match_operand 3 "" ""))
954 rtx t = nvptx_expand_compare (operands[0]);
956 operands[1] = XEXP (t, 0);
957 operands[2] = XEXP (t, 1);
960 (define_expand "cbranch<mode>4"
962 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
963 [(match_operand:SDFM 1 "nvptx_register_operand" "")
964 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
965 (label_ref (match_operand 3 "" ""))
969 rtx t = nvptx_expand_compare (operands[0]);
971 operands[1] = XEXP (t, 0);
972 operands[2] = XEXP (t, 1);
975 (define_expand "cbranchbi4"
977 (if_then_else (match_operator 0 "predicate_operator"
978 [(match_operand:BI 1 "nvptx_register_operand" "")
979 (match_operand:BI 2 "const0_operand" "")])
980 (label_ref (match_operand 3 "" ""))
985 ;; Conditional stores
987 (define_insn "setcc<mode>_from_bi"
988 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
989 (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
992 "%.\\tselp%t0\\t%0, 1, 0, %1;")
994 (define_insn "*setcc<mode>_from_not_bi"
995 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
996 (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
999 "%.\\tselp%t0\\t%0, 0, 1, %1;")
1001 (define_insn "extendbi<mode>2"
1002 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1004 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1006 "%.\\tselp%t0\\t%0, -1, 0, %1;")
1008 (define_insn "zero_extendbi<mode>2"
1009 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1011 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1013 "%.\\tselp%t0\\t%0, 1, 0, %1;")
1015 (define_insn "sel_true<mode>"
1016 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1018 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1019 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1020 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1022 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1024 (define_insn "sel_true<mode>"
1025 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1027 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1028 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1029 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1031 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1033 (define_insn "sel_false<mode>"
1034 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1036 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1037 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1038 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1040 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1042 (define_insn "sel_false<mode>"
1043 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1045 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1046 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1047 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1049 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1051 (define_code_iterator eqne [eq ne])
1053 ;; Split negation of a predicate into a conditional move.
1054 (define_insn_and_split "*selp<mode>_neg_<code>"
1055 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1056 (neg:HSDIM (eqne:HSDIM
1057 (match_operand:BI 1 "nvptx_register_operand" "R")
1064 (eqne (match_dup 1) (const_int 0))
1068 ;; Split bitwise not of a predicate into a conditional move.
1069 (define_insn_and_split "*selp<mode>_not_<code>"
1070 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1071 (not:HSDIM (eqne:HSDIM
1072 (match_operand:BI 1 "nvptx_register_operand" "R")
1079 (eqne (match_dup 1) (const_int 0))
1083 (define_insn "*setcc_int<mode>"
1084 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1086 (match_operator:SI 1 "nvptx_comparison_operator"
1087 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1088 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
1090 "%.\\tset%t0%c1\\t%0, %2, %3;")
1092 (define_insn "*setcc_int<mode>"
1093 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1095 (match_operator:SI 1 "nvptx_float_comparison_operator"
1096 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1097 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
1099 "%.\\tset%t0%c1\\t%0, %2, %3;")
1101 (define_insn "setcc_float<mode>"
1102 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1103 (match_operator:SF 1 "nvptx_comparison_operator"
1104 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1105 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
1107 "%.\\tset%t0%c1\\t%0, %2, %3;")
1109 (define_insn "setcc_float<mode>"
1110 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1111 (match_operator:SF 1 "nvptx_float_comparison_operator"
1112 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1113 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
1115 "%.\\tset%t0%c1\\t%0, %2, %3;")
1117 (define_expand "cstore<mode>4"
1118 [(set (match_operand:SI 0 "nvptx_register_operand")
1119 (match_operator:SI 1 "nvptx_comparison_operator"
1120 [(match_operand:HSDIM 2 "nvptx_register_operand")
1121 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
1124 rtx reg = gen_reg_rtx (BImode);
1125 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1126 operands[2], operands[3]);
1127 emit_move_insn (reg, cmp);
1128 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1132 (define_expand "cstore<mode>4"
1133 [(set (match_operand:SI 0 "nvptx_register_operand")
1134 (match_operator:SI 1 "nvptx_float_comparison_operator"
1135 [(match_operand:SDFM 2 "nvptx_register_operand")
1136 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
1139 rtx reg = gen_reg_rtx (BImode);
1140 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1141 operands[2], operands[3]);
1142 emit_move_insn (reg, cmp);
1143 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1147 (define_expand "cstorehf4"
1148 [(set (match_operand:SI 0 "nvptx_register_operand")
1149 (match_operator:SI 1 "nvptx_float_comparison_operator"
1150 [(match_operand:HF 2 "nvptx_register_operand")
1151 (match_operand:HF 3 "nvptx_nonmemory_operand")]))]
1154 rtx reg = gen_reg_rtx (BImode);
1155 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1156 operands[2], operands[3]);
1157 emit_move_insn (reg, cmp);
1158 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1164 (define_insn "call_insn_<mode>"
1165 [(match_parallel 2 "call_operation"
1166 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
1167 (match_operand 1))])]
1170 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
1173 (define_insn "call_value_insn_<mode>"
1174 [(match_parallel 3 "call_operation"
1175 [(set (match_operand 0 "nvptx_register_operand" "=R")
1176 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
1177 (match_operand 2)))])]
1180 return nvptx_output_call_insn (insn, operands[0], operands[1]);
1183 (define_expand "call"
1184 [(match_operand 0 "" "")]
1187 nvptx_expand_call (NULL_RTX, operands[0]);
1191 (define_expand "call_value"
1192 [(match_operand 0 "" "")
1193 (match_operand 1 "" "")]
1196 nvptx_expand_call (operands[0], operands[1]);
1200 ;; Floating point arithmetic.
1202 (define_insn "add<mode>3"
1203 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1204 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1205 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1207 "%.\\tadd%t0\\t%0, %1, %2;")
1209 (define_insn "sub<mode>3"
1210 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1211 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1212 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
1214 "%.\\tsub%t0\\t%0, %1, %2;")
1216 (define_insn "mul<mode>3"
1217 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1218 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1219 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1221 "%.\\tmul%t0\\t%0, %1, %2;")
1223 (define_insn "fma<mode>4"
1224 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1225 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1226 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1227 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1229 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
1231 (define_insn "*recip<mode>2"
1232 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1234 (match_operand:SDFM 2 "const_double_operand" "F")
1235 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1236 "CONST_DOUBLE_P (operands[2])
1237 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
1238 "%.\\trcp%#%t0\\t%0, %1;")
1240 (define_insn "div<mode>3"
1241 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1242 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1243 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1245 "%.\\tdiv%#%t0\\t%0, %1, %2;")
1247 (define_insn "copysign<mode>3"
1248 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1249 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
1250 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")]
1253 "%.\\tcopysign%t0\\t%0, %2, %1;")
1255 (define_insn "smin<mode>3"
1256 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1257 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1258 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1260 "%.\\tmin%t0\\t%0, %1, %2;")
1262 (define_insn "smax<mode>3"
1263 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1264 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1265 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1267 "%.\\tmax%t0\\t%0, %1, %2;")
1269 (define_insn "abs<mode>2"
1270 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1271 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1273 "%.\\tabs%t0\\t%0, %1;")
1275 (define_insn "neg<mode>2"
1276 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1277 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1279 "%.\\tneg%t0\\t%0, %1;")
1281 (define_insn "sqrt<mode>2"
1282 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1283 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1285 "%.\\tsqrt%#%t0\\t%0, %1;")
1287 (define_expand "sincossf3"
1288 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1289 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
1291 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
1292 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1293 "flag_unsafe_math_optimizations"
1295 operands[2] = make_safe_from (operands[2], operands[0]);
1298 (define_insn "sinsf2"
1299 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1300 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1302 "flag_unsafe_math_optimizations"
1303 "%.\\tsin.approx%t0\\t%0, %1;")
1305 (define_insn "cossf2"
1306 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1307 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1309 "flag_unsafe_math_optimizations"
1310 "%.\\tcos.approx%t0\\t%0, %1;")
1312 (define_insn "log2sf2"
1313 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1314 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1316 "flag_unsafe_math_optimizations"
1317 "%.\\tlg2.approx%t0\\t%0, %1;")
1319 (define_insn "exp2sf2"
1320 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1321 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1323 "flag_unsafe_math_optimizations"
1324 "%.\\tex2.approx%t0\\t%0, %1;")
1326 (define_insn "setcc_isinf<mode>"
1327 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
1328 (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1331 "%.\\ttestp.infinite%t1\\t%0, %1;")
1333 (define_expand "isinf<mode>2"
1334 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1335 (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1339 rtx pred = gen_reg_rtx (BImode);
1340 emit_insn (gen_setcc_isinf<mode> (pred, operands[1]));
1341 emit_insn (gen_setccsi_from_bi (operands[0], pred));
1345 ;; HFmode floating point arithmetic.
1347 (define_insn "addhf3"
1348 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1349 (plus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1350 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1352 "%.\\tadd.f16\\t%0, %1, %2;")
1354 (define_insn "subhf3"
1355 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1356 (minus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1357 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1359 "%.\\tsub.f16\\t%0, %1, %2;")
1361 (define_insn "mulhf3"
1362 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1363 (mult:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1364 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1366 "%.\\tmul.f16\\t%0, %1, %2;")
1368 (define_insn "fmahf4"
1369 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1370 (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1371 (match_operand:HF 2 "nvptx_nonmemory_operand" "RF")
1372 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))]
1374 "%.\\tfma%#.f16\\t%0, %1, %2, %3;")
1376 (define_insn "neghf2"
1377 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1378 (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1380 "%.\\txor.b16\\t%0, %1, -32768;")
1382 (define_insn "abshf2"
1383 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1384 (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1386 "%.\\tand.b16\\t%0, %1, 32767;")
1388 (define_insn "exp2hf2"
1389 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1390 (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
1392 "TARGET_SM75 && flag_unsafe_math_optimizations"
1393 "%.\\tex2.approx.f16\\t%0, %1;")
1395 (define_insn "tanh<mode>2"
1396 [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R")
1397 (unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")]
1399 "TARGET_SM75 && flag_unsafe_math_optimizations"
1400 "%.\\ttanh.approx%t0\\t%0, %1;")
1402 ;; HFmode floating point arithmetic.
1404 (define_insn "sminhf3"
1405 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1406 (smin:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1407 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1409 "%.\\tmin.f16\\t%0, %1, %2;")
1411 (define_insn "smaxhf3"
1412 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1413 (smax:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1414 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1416 "%.\\tmax.f16\\t%0, %1, %2;")
1418 ;; Conversions involving floating point
1420 (define_insn "extendsfdf2"
1421 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1422 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1424 "%.\\tcvt%t0%t1\\t%0, %1;")
1426 (define_insn "truncdfsf2"
1427 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1428 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1430 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1432 (define_insn "floatunssi<mode>2"
1433 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1434 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1436 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1438 (define_insn "floatsi<mode>2"
1439 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1440 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1442 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1444 (define_insn "floatunsdi<mode>2"
1445 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1446 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1448 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1450 (define_insn "floatdi<mode>2"
1451 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1452 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1454 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1456 (define_insn "fixuns_trunc<mode>si2"
1457 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1458 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1460 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1462 (define_insn "fix_trunc<mode>si2"
1463 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1464 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1466 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1468 (define_insn "fixuns_trunc<mode>di2"
1469 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1470 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1472 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1474 (define_insn "fix_trunc<mode>di2"
1475 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1476 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1478 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1480 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1481 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1482 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1483 (UNSPEC_FPINT_BTRUNC "btrunc")
1484 (UNSPEC_FPINT_CEIL "ceil")
1485 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1486 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1487 (UNSPEC_FPINT_BTRUNC ".rzi")
1488 (UNSPEC_FPINT_CEIL ".rpi")
1489 (UNSPEC_FPINT_NEARBYINT "%#i")])
1491 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1492 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1493 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1496 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1498 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1499 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1500 (UNSPEC_FPINT_CEIL "lceil")])
1501 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1502 (UNSPEC_FPINT_CEIL ".rpi")])
1504 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1505 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1506 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1509 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1511 (define_insn "extendhf<mode>2"
1512 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1513 (float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))]
1515 "%.\\tcvt%t0%t1\\t%0, %1;")
1517 (define_insn "trunc<mode>hf2"
1518 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1519 (float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1521 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1523 ;; Vector operations
1525 (define_insn "*vec_set<mode>_0"
1526 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1528 (vec_duplicate:VECIM
1529 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1533 "%.\\tmov%t1\\t%0.x, %1;")
1535 (define_insn "*vec_set<mode>_1"
1536 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1538 (vec_duplicate:VECIM
1539 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1543 "%.\\tmov%t1\\t%0.y, %1;")
1545 (define_insn "*vec_set<mode>_2"
1546 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1548 (vec_duplicate:VECIM
1549 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1553 "%.\\tmov%t1\\t%0.z, %1;")
1555 (define_insn "*vec_set<mode>_3"
1556 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1558 (vec_duplicate:VECIM
1559 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1563 "%.\\tmov%t1\\t%0.w, %1;")
1565 (define_expand "vec_set<mode>"
1566 [(match_operand:VECIM 0 "nvptx_register_operand")
1567 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1568 (match_operand:SI 2 "nvptx_vector_index_operand")]
1571 enum machine_mode mode = GET_MODE (operands[0]);
1572 int mask = 1 << INTVAL (operands[2]);
1573 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1574 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1575 emit_insn (gen_rtx_SET (operands[0], tmp));
1579 (define_insn "vec_extract<mode><Vecelem>"
1580 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1581 (vec_select:<VECELEM>
1582 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1583 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1586 static const char *const asms[4] = {
1587 "%.\\tmov%t0\\t%0, %1.x;",
1588 "%.\\tmov%t0\\t%0, %1.y;",
1589 "%.\\tmov%t0\\t%0, %1.z;",
1590 "%.\\tmov%t0\\t%0, %1.w;"
1592 return asms[INTVAL (operands[2])];
1607 (define_insn "fake_nop"
1611 .reg .u32 %%nop_src;
1612 .reg .u32 %%nop_dst;
1613 mov.u32 %%nop_dst, %%nop_src;
1616 (define_insn "return"
1620 return nvptx_output_return ();
1622 [(set_attr "predicable" "no")])
1624 (define_expand "epilogue"
1625 [(clobber (const_int 0))]
1628 if (TARGET_SOFT_STACK)
1629 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1630 SOFTSTACK_PREV_REGNUM)));
1631 emit_jump_insn (gen_return ());
1635 (define_expand "nonlocal_goto"
1636 [(match_operand 0 "" "")
1637 (match_operand 1 "" "")
1638 (match_operand 2 "" "")
1639 (match_operand 3 "" "")]
1642 sorry ("target cannot support nonlocal goto");
1643 emit_insn (gen_nop ());
1647 (define_expand "nonlocal_goto_receiver"
1651 sorry ("target cannot support nonlocal goto");
1654 (define_expand "allocate_stack"
1655 [(match_operand 0 "nvptx_register_operand")
1656 (match_operand 1 "nvptx_register_operand")]
1659 if (TARGET_SOFT_STACK)
1661 emit_move_insn (stack_pointer_rtx,
1662 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1663 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1664 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1667 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1668 only) but notes it is not implemented. The assembler emits a
1669 confused error message. Issue a blunt one now instead. */
1670 sorry ("target cannot support alloca");
1671 emit_insn (gen_nop ());
1675 (define_insn "@set_softstack_<mode>"
1676 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1677 UNSPEC_SET_SOFTSTACK)]
1680 return nvptx_output_set_softstack (REGNO (operands[0]));
1683 (define_expand "restore_stack_block"
1684 [(match_operand 0 "register_operand" "")
1685 (match_operand 1 "register_operand" "")]
1688 if (TARGET_SOFT_STACK)
1690 emit_move_insn (operands[0], operands[1]);
1691 emit_insn (gen_set_softstack (Pmode, operands[0]));
1696 (define_expand "restore_stack_function"
1697 [(match_operand 0 "register_operand" "")
1698 (match_operand 1 "register_operand" "")]
1705 [(trap_if (const_int 1) (const_int 0))]
1709 (define_insn "trap_if_true"
1710 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1714 "%j0 trap; %j0 exit;"
1715 [(set_attr "predicable" "no")])
1717 (define_insn "trap_if_false"
1718 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1722 "%J0 trap; %J0 exit;"
1723 [(set_attr "predicable" "no")])
1725 (define_expand "ctrap<mode>4"
1726 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1727 [(match_operand:SDIM 1 "nvptx_register_operand")
1728 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1729 (match_operand 3 "const0_operand"))]
1732 rtx t = nvptx_expand_compare (operands[0]);
1733 emit_insn (gen_trap_if_true (t));
1737 (define_insn "oacc_dim_size"
1738 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1739 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1743 static const char *const asms[] =
1744 { /* Must match oacc_loop_levels ordering. */
1745 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1746 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1747 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1749 return asms[INTVAL (operands[1])];
1752 (define_insn "oacc_dim_pos"
1753 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1754 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1758 static const char *const asms[] =
1759 { /* Must match oacc_loop_levels ordering. */
1760 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1761 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1762 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1764 return asms[INTVAL (operands[1])];
1767 (define_insn "nvptx_fork"
1768 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1772 [(set_attr "predicable" "no")])
1774 (define_insn "nvptx_forked"
1775 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1779 [(set_attr "predicable" "no")])
1781 (define_insn "nvptx_joining"
1782 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1786 [(set_attr "predicable" "no")])
1788 (define_insn "nvptx_join"
1789 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1793 [(set_attr "predicable" "no")])
1795 (define_expand "oacc_fork"
1796 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1797 (match_operand:SI 1 "general_operand" ""))
1798 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1802 if (operands[0] != const0_rtx)
1803 emit_move_insn (operands[0], operands[1]);
1804 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1808 (define_expand "oacc_join"
1809 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1810 (match_operand:SI 1 "general_operand" ""))
1811 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1815 if (operands[0] != const0_rtx)
1816 emit_move_insn (operands[0], operands[1]);
1817 nvptx_expand_oacc_join (INTVAL (operands[2]));
1821 ;; only 32-bit shuffles exist.
1822 (define_insn "nvptx_shuffle<mode>"
1823 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1825 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1826 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1827 (match_operand:SI 3 "const_int_operand" "n")]
1832 return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;";
1834 return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;";
1837 (define_insn "nvptx_vote_ballot"
1838 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1839 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1840 UNSPEC_VOTE_BALLOT))]
1844 return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;";
1846 return "%.\\tvote.ballot.b32\\t%0, %1;";
1849 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1851 (define_insn "@omp_simt_enter_<mode>"
1852 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1853 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1854 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1855 UNSPECV_SIMT_ENTER))]
1858 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1861 (define_expand "omp_simt_enter"
1862 [(match_operand 0 "nvptx_register_operand" "=R")
1863 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1864 (match_operand 2 "const_int_operand" "n")]
1867 if (!CONST_INT_P (operands[1]))
1868 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1870 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1871 cfun->machine->simt_stack_size);
1872 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1873 cfun->machine->simt_stack_align);
1874 cfun->machine->has_simtreg = true;
1875 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1879 (define_expand "omp_simt_exit"
1880 [(match_operand 0 "nvptx_register_operand" "R")]
1883 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1885 emit_insn (gen_nvptx_warpsync ());
1887 emit_insn (gen_nvptx_uniform_warp_check ());
1891 (define_insn "@omp_simt_exit_<mode>"
1892 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1896 return nvptx_output_simt_exit (operands[0]);
1899 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1900 (define_insn "omp_simt_lane"
1901 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1902 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1904 "%.\\tmov.u32\\t%0, %%laneid;")
1906 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1907 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1908 (define_expand "omp_simt_ordered"
1909 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1910 (match_operand:SI 1 "nvptx_register_operand" "R")]
1913 emit_move_insn (operands[0], operands[1]);
1914 emit_insn (gen_nvptx_nounroll ());
1918 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1920 (define_expand "omp_simt_xchg_bfly"
1921 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1922 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1923 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1926 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1931 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1932 ;; from lane given by index in operand 2 to operand 0 in all lanes
1933 (define_expand "omp_simt_xchg_idx"
1934 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1935 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1936 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1939 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1944 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1945 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1946 (define_expand "omp_simt_vote_any"
1947 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1948 (match_operand:SI 1 "nvptx_register_operand" "R")]
1951 rtx pred = gen_reg_rtx (BImode);
1952 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1953 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1957 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1958 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1959 (define_expand "omp_simt_last_lane"
1960 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1961 (match_operand:SI 1 "nvptx_register_operand" "R")]
1964 rtx pred = gen_reg_rtx (BImode);
1965 rtx tmp = gen_reg_rtx (SImode);
1966 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1967 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1968 emit_insn (gen_ctzsi2 (operands[0], tmp));
1972 ;; extract parts of a 64 bit object into 2 32-bit ints
1973 (define_insn "unpack<mode>si2"
1974 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1975 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1976 (const_int 0)] UNSPEC_BIT_CONV))
1977 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1978 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1980 "%.\\tmov.b64\\t{%0,%1}, %2;")
1982 ;; pack 2 32-bit ints into a 64 bit object
1983 (define_insn "packsi<mode>2"
1984 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1985 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1986 (match_operand:SI 2 "nvptx_register_operand" "R")]
1989 "%.\\tmov.b64\\t%0, {%1,%2};")
1993 (define_expand "atomic_compare_and_swap<mode>"
1994 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1995 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1996 (match_operand:SDIM 2 "memory_operand") ;; memory
1997 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1998 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1999 (match_operand:SI 5 "const_int_operand") ;; is_weak
2000 (match_operand:SI 6 "const_int_operand") ;; success model
2001 (match_operand:SI 7 "const_int_operand")] ;; failure model
2004 if (nvptx_mem_local_p (operands[2]))
2005 emit_insn (gen_atomic_compare_and_swap<mode>_1_local
2006 (operands[1], operands[2], operands[3], operands[4],
2009 emit_insn (gen_atomic_compare_and_swap<mode>_1
2010 (operands[1], operands[2], operands[3], operands[4],
2013 rtx cond = gen_reg_rtx (BImode);
2014 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
2015 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
2019 (define_insn "atomic_compare_and_swap<mode>_1_local"
2020 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2021 (unspec_volatile:SDIM
2022 [(match_operand:SDIM 1 "memory_operand" "+m")
2023 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2024 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2025 (match_operand:SI 4 "const_int_operand")]
2028 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
2031 output_asm_insn ("{", NULL);
2032 output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
2033 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2034 output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2035 output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;",
2037 output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands);
2038 output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2039 output_asm_insn ("}", NULL);
2042 [(set_attr "predicable" "no")])
2044 (define_insn "atomic_compare_and_swap<mode>_1"
2045 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2046 (unspec_volatile:SDIM
2047 [(match_operand:SDIM 1 "memory_operand" "+m")
2048 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2049 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2050 (match_operand:SI 4 "const_int_operand")]
2053 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
2057 = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;";
2058 return nvptx_output_atomic_insn (t, operands, 1, 4);
2060 [(set_attr "atomic" "true")])
2062 (define_insn "atomic_exchange<mode>"
2063 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
2064 (unspec_volatile:SDIM
2065 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
2066 (match_operand:SI 3 "const_int_operand")] ;; model
2069 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
2072 if (nvptx_mem_local_p (operands[1]))
2074 output_asm_insn ("{", NULL);
2075 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2076 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2077 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands);
2078 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2079 output_asm_insn ("}", NULL);
2083 = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;";
2084 return nvptx_output_atomic_insn (t, operands, 1, 3);
2086 [(set_attr "atomic" "true")])
2088 (define_expand "atomic_store<mode>"
2089 [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory
2090 (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2091 (match_operand:SI 2 "const_int_operand")] ;; model
2094 struct address_info info;
2095 decompose_mem_address (&info, operands[0]);
2096 if (info.base != NULL && REG_P (*info.base)
2097 && REGNO_PTR_FRAME_P (REGNO (*info.base)))
2099 emit_insn (gen_mov<mode> (operands[0], operands[1]));
2105 emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1],
2110 bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
2111 if (!maybe_shared_p)
2112 /* Fall back to expand_atomic_store. */
2115 emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
2120 (define_insn "nvptx_atomic_store_sm70<mode>"
2121 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2122 (unspec_volatile:SDIM
2123 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2124 (match_operand:SI 2 "const_int_operand")] ;; model
2129 = "%.\tst%A0.b%T0\t%0, %1;";
2130 return nvptx_output_atomic_insn (t, operands, 0, 2);
2132 [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn.
2134 (define_insn "nvptx_atomic_store<mode>"
2135 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2136 (unspec_volatile:SDIM
2137 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2138 (match_operand:SI 2 "const_int_operand")] ;; model
2143 = "%.\tatom%A0.exch.b%T0\t_, %0, %1;";
2144 return nvptx_output_atomic_insn (t, operands, 0, 2);
2146 [(set_attr "atomic" "true")])
2148 (define_insn "atomic_fetch_add<mode>"
2149 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2150 (unspec_volatile:SDIM
2151 [(plus:SDIM (match_dup 1)
2152 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2153 (match_operand:SI 3 "const_int_operand")] ;; model
2155 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2159 if (nvptx_mem_local_p (operands[1]))
2161 output_asm_insn ("{", NULL);
2162 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2163 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2164 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2165 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2167 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2168 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2169 output_asm_insn ("}", NULL);
2173 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2174 return nvptx_output_atomic_insn (t, operands, 1, 3);
2176 [(set_attr "atomic" "true")])
2178 (define_insn "atomic_fetch_addsf"
2179 [(set (match_operand:SF 1 "memory_operand" "+m")
2181 [(plus:SF (match_dup 1)
2182 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
2183 (match_operand:SI 3 "const_int_operand")] ;; model
2185 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
2189 if (nvptx_mem_local_p (operands[1]))
2191 output_asm_insn ("{", NULL);
2192 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2193 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2194 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2195 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2197 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2198 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2199 output_asm_insn ("}", NULL);
2203 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2204 return nvptx_output_atomic_insn (t, operands, 1, 3);
2206 [(set_attr "atomic" "true")])
2208 (define_insn "atomic_fetch_<logic><mode>"
2209 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2210 (unspec_volatile:SDIM
2211 [(any_logic:SDIM (match_dup 1)
2212 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2213 (match_operand:SI 3 "const_int_operand")] ;; model
2215 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2217 "<MODE>mode == SImode || TARGET_SM35"
2219 if (nvptx_mem_local_p (operands[1]))
2221 output_asm_insn ("{", NULL);
2222 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
2223 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands);
2224 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2225 output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;",
2227 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2228 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2229 output_asm_insn ("}", NULL);
2233 = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;";
2234 return nvptx_output_atomic_insn (t, operands, 1, 3);
2237 [(set_attr "atomic" "true")])
2239 (define_expand "atomic_test_and_set"
2240 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
2241 (match_operand:QI 1 "memory_operand") ;; memory
2242 (match_operand:SI 2 "const_int_operand")] ;; model
2247 libfunc = init_one_libfunc ("__atomic_test_and_set_1");
2248 addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0));
2249 emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode,
2251 operands[2], SImode);
2255 (define_insn "nvptx_barsync"
2256 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
2257 (match_operand:SI 1 "const_int_operand")]
2261 if (INTVAL (operands[1]) == 0)
2262 return (TARGET_PTX_6_0
2263 ? "\\tbarrier.sync.aligned\\t%0;"
2264 : "\\tbar.sync\\t%0;");
2266 return (TARGET_PTX_6_0
2267 ? "\\tbarrier.sync\\t%0, %1;"
2268 : "\\tbar.sync\\t%0, %1;");
2270 [(set_attr "predicable" "no")])
2272 (define_insn "nvptx_warpsync"
2273 [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
2275 "%.\\tbar.warp.sync\\t0xffffffff;")
2277 (define_int_iterator BARRED
2280 UNSPECV_BARRED_POPC])
2281 (define_int_attr barred_op
2282 [(UNSPECV_BARRED_AND "and")
2283 (UNSPECV_BARRED_OR "or")
2284 (UNSPECV_BARRED_POPC "popc")])
2285 (define_int_attr barred_mode
2286 [(UNSPECV_BARRED_AND "BI")
2287 (UNSPECV_BARRED_OR "BI")
2288 (UNSPECV_BARRED_POPC "SI")])
2289 (define_int_attr barred_ptxtype
2290 [(UNSPECV_BARRED_AND "pred")
2291 (UNSPECV_BARRED_OR "pred")
2292 (UNSPECV_BARRED_POPC "u32")])
2294 (define_insn "nvptx_barred_<barred_op>"
2295 [(set (match_operand:<barred_mode> 0 "nvptx_register_operand" "=R")
2297 [(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri")
2298 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
2299 (match_operand:SI 3 "const_int_operand" "i")
2300 (match_operand:BI 4 "nvptx_register_operand" "R")]
2303 "\\tbar.red.<barred_op>.<barred_ptxtype> \\t%0, %1, %2, %p3%4;";"
2304 [(set_attr "predicable" "no")])
2306 (define_insn "nvptx_uniform_warp_check"
2307 [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
2310 const char *insns[] = {
2312 "\\t" ".reg.b32" "\\t" "%%r_act;",
2313 "%.\\t" "vote.ballot.b32" "\\t" "%%r_act,1;",
2314 "\\t" ".reg.pred" "\\t" "%%r_do_abort;",
2315 "\\t" "mov.pred" "\\t" "%%r_do_abort,0;",
2316 "%.\\t" "setp.ne.b32" "\\t" "%%r_do_abort,%%r_act,"
2318 "@ %%r_do_abort\\t" "trap;",
2319 "@ %%r_do_abort\\t" "exit;",
2323 for (const char **p = &insns[0]; *p != NULL; p++)
2324 output_asm_insn (*p, NULL);
2328 (define_expand "memory_barrier"
2330 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2333 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2334 MEM_VOLATILE_P (operands[0]) = 1;
2337 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
2338 ;; (corresponding to cuda functions threadfence_block, threadfence and
2339 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
2340 ;; may be overconservative, but before using membar.gl instead we'll need to
2341 ;; explain in detail why it's safe to use. For now, use membar.sys.
2342 (define_insn "*memory_barrier"
2343 [(set (match_operand:BLK 0 "" "")
2344 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2347 [(set_attr "predicable" "no")])
2349 (define_expand "nvptx_membar_cta"
2351 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2354 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2355 MEM_VOLATILE_P (operands[0]) = 1;
2358 (define_insn "*nvptx_membar_cta"
2359 [(set (match_operand:BLK 0 "" "")
2360 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2363 [(set_attr "predicable" "no")])
2365 (define_expand "nvptx_membar_gl"
2367 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2370 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2371 MEM_VOLATILE_P (operands[0]) = 1;
2374 (define_insn "*nvptx_membar_gl"
2375 [(set (match_operand:BLK 0 "" "")
2376 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2379 [(set_attr "predicable" "no")])
2381 (define_insn "nvptx_nounroll"
2382 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
2384 "\\t.pragma \\\"nounroll\\\";"
2385 [(set_attr "predicable" "no")])
2387 (define_insn "nvptx_red_partition"
2388 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
2389 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
2393 return nvptx_output_red_partition (operands[0], operands[1]);
2395 [(set_attr "predicable" "no")])
2397 ;; Expand QI mode operations using SI mode instructions.
2398 (define_code_iterator any_sbinary [plus minus smin smax])
2399 (define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")])
2401 (define_code_iterator any_ubinary [and ior xor umin umax])
2402 (define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin")
2405 (define_code_iterator any_sunary [neg abs])
2406 (define_code_attr sunary [(neg "neg") (abs "abs")])
2408 (define_code_iterator any_uunary [not])
2409 (define_code_attr uunary [(not "one_cmpl")])
2411 (define_expand "<sbinary>qi3"
2412 [(set (match_operand:QI 0 "nvptx_register_operand")
2413 (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2414 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2417 rtx reg = gen_reg_rtx (SImode);
2418 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2419 rtx op1 = convert_modes (SImode, QImode, operands[2], 0);
2420 if (<CODE> == MINUS)
2421 op0 = force_reg (SImode, op0);
2422 emit_insn (gen_<sbinary>si3 (reg, op0, op1));
2423 emit_insn (gen_truncsiqi2 (operands[0], reg));
2427 (define_expand "<ubinary>qi3"
2428 [(set (match_operand:QI 0 "nvptx_register_operand")
2429 (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2430 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2433 rtx reg = gen_reg_rtx (SImode);
2434 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2435 rtx op1 = convert_modes (SImode, QImode, operands[2], 1);
2436 emit_insn (gen_<ubinary>si3 (reg, op0, op1));
2437 emit_insn (gen_truncsiqi2 (operands[0], reg));
2441 (define_expand "<sunary>qi2"
2442 [(set (match_operand:QI 0 "nvptx_register_operand")
2443 (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2446 rtx reg = gen_reg_rtx (SImode);
2447 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2448 emit_insn (gen_<sunary>si2 (reg, op0));
2449 emit_insn (gen_truncsiqi2 (operands[0], reg));
2453 (define_expand "<uunary>qi2"
2454 [(set (match_operand:QI 0 "nvptx_register_operand")
2455 (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2458 rtx reg = gen_reg_rtx (SImode);
2459 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2460 emit_insn (gen_<uunary>si2 (reg, op0));
2461 emit_insn (gen_truncsiqi2 (operands[0], reg));
2465 (define_expand "cstoreqi4"
2466 [(set (match_operand:SI 0 "nvptx_register_operand")
2467 (match_operator:SI 1 "nvptx_comparison_operator"
2468 [(match_operand:QI 2 "nvptx_nonmemory_operand")
2469 (match_operand:QI 3 "nvptx_nonmemory_operand")]))]
2472 rtx reg = gen_reg_rtx (BImode);
2473 enum rtx_code code = GET_CODE (operands[1]);
2474 int unsignedp = unsigned_condition_p (code);
2475 rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp);
2476 rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp);
2477 rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3);
2478 emit_insn (gen_cmpsi (reg, cmp, op2, op3));
2479 emit_insn (gen_setccsi_from_bi (operands[0], reg));
2483 (define_insn "*ext_truncsi2_qi"
2484 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2486 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2488 "%.\\tcvt.s32.s8\\t%0, %1;")
2490 (define_insn "*zext_truncsi2_qi"
2491 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2493 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2495 "%.\\tcvt.u32.u8\\t%0, %1;")