gcc/
[official-gcc.git] / gcc / config / nvptx / nvptx.c
bloba434bde73a32e69f80b1dfaad31edd770c7eb44e
1 /* Target code for NVPTX.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include <sstream>
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "insn-flags.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "insn-codes.h"
41 #include "hashtab.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "flags.h"
45 #include "statistics.h"
46 #include "real.h"
47 #include "fixed-value.h"
48 #include "insn-config.h"
49 #include "expmed.h"
50 #include "dojump.h"
51 #include "explow.h"
52 #include "calls.h"
53 #include "emit-rtl.h"
54 #include "varasm.h"
55 #include "stmt.h"
56 #include "expr.h"
57 #include "regs.h"
58 #include "optabs.h"
59 #include "recog.h"
60 #include "ggc.h"
61 #include "timevar.h"
62 #include "tm_p.h"
63 #include "tm-preds.h"
64 #include "tm-constrs.h"
65 #include "langhooks.h"
66 #include "dbxout.h"
67 #include "target.h"
68 #include "target-def.h"
69 #include "diagnostic.h"
70 #include "predict.h"
71 #include "basic-block.h"
72 #include "cfgrtl.h"
73 #include "stor-layout.h"
74 #include "df.h"
75 #include "builtins.h"
77 /* Record the function decls we've written, and the libfuncs and function
78 decls corresponding to them. */
79 static std::stringstream func_decls;
81 struct declared_libfunc_hasher : ggc_cache_hasher<rtx>
83 static hashval_t hash (rtx x) { return htab_hash_pointer (x); }
84 static bool equal (rtx a, rtx b) { return a == b; }
87 static GTY((cache))
88 hash_table<declared_libfunc_hasher> *declared_libfuncs_htab;
90 struct tree_hasher : ggc_cache_hasher<tree>
92 static hashval_t hash (tree t) { return htab_hash_pointer (t); }
93 static bool equal (tree a, tree b) { return a == b; }
96 static GTY((cache)) hash_table<tree_hasher> *declared_fndecls_htab;
97 static GTY((cache)) hash_table<tree_hasher> *needed_fndecls_htab;
99 /* Allocate a new, cleared machine_function structure. */
101 static struct machine_function *
102 nvptx_init_machine_status (void)
104 struct machine_function *p = ggc_cleared_alloc<machine_function> ();
105 p->ret_reg_mode = VOIDmode;
106 return p;
109 /* Implement TARGET_OPTION_OVERRIDE. */
111 static void
112 nvptx_option_override (void)
114 init_machine_status = nvptx_init_machine_status;
115 /* Gives us a predictable order, which we need especially for variables. */
116 flag_toplevel_reorder = 1;
117 /* Assumes that it will see only hard registers. */
118 flag_var_tracking = 0;
119 write_symbols = NO_DEBUG;
120 debug_info_level = DINFO_LEVEL_NONE;
122 declared_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
123 needed_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
124 declared_libfuncs_htab
125 = hash_table<declared_libfunc_hasher>::create_ggc (17);
128 /* Return the mode to be used when declaring a ptx object for OBJ.
129 For objects with subparts such as complex modes this is the mode
130 of the subpart. */
132 machine_mode
133 nvptx_underlying_object_mode (rtx obj)
135 if (GET_CODE (obj) == SUBREG)
136 obj = SUBREG_REG (obj);
137 machine_mode mode = GET_MODE (obj);
138 if (mode == TImode)
139 return DImode;
140 if (COMPLEX_MODE_P (mode))
141 return GET_MODE_INNER (mode);
142 return mode;
145 /* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
146 deal with ptx ideosyncracies. */
148 const char *
149 nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
151 switch (mode)
153 case BLKmode:
154 return ".b8";
155 case BImode:
156 return ".pred";
157 case QImode:
158 if (promote)
159 return ".u32";
160 else
161 return ".u8";
162 case HImode:
163 return ".u16";
164 case SImode:
165 return ".u32";
166 case DImode:
167 return ".u64";
169 case SFmode:
170 return ".f32";
171 case DFmode:
172 return ".f64";
174 default:
175 gcc_unreachable ();
179 /* Return the number of pieces to use when dealing with a pseudo of *PMODE.
180 Alter *PMODE if we return a number greater than one. */
182 static int
183 maybe_split_mode (machine_mode *pmode)
185 machine_mode mode = *pmode;
187 if (COMPLEX_MODE_P (mode))
189 *pmode = GET_MODE_INNER (mode);
190 return 2;
192 else if (mode == TImode)
194 *pmode = DImode;
195 return 2;
197 return 1;
200 /* Like maybe_split_mode, but only return whether or not the mode
201 needs to be split. */
202 static bool
203 nvptx_split_reg_p (machine_mode mode)
205 if (COMPLEX_MODE_P (mode))
206 return true;
207 if (mode == TImode)
208 return true;
209 return false;
212 #define PASS_IN_REG_P(MODE, TYPE) \
213 ((GET_MODE_CLASS (MODE) == MODE_INT \
214 || GET_MODE_CLASS (MODE) == MODE_FLOAT \
215 || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \
216 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
217 && !AGGREGATE_TYPE_P (TYPE))) \
218 && (MODE) != TImode)
220 #define RETURN_IN_REG_P(MODE) \
221 ((GET_MODE_CLASS (MODE) == MODE_INT \
222 || GET_MODE_CLASS (MODE) == MODE_FLOAT) \
223 && GET_MODE_SIZE (MODE) <= 8)
225 /* Perform a mode promotion for a function argument with MODE. Return
226 the promoted mode. */
228 static machine_mode
229 arg_promotion (machine_mode mode)
231 if (mode == QImode || mode == HImode)
232 return SImode;
233 return mode;
236 /* Write the declaration of a function arg of TYPE to S. I is the index
237 of the argument, MODE its mode. NO_ARG_TYPES is true if this is for
238 a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */
240 static int
241 write_one_arg (std::stringstream &s, tree type, int i, machine_mode mode,
242 bool no_arg_types)
244 if (!PASS_IN_REG_P (mode, type))
245 mode = Pmode;
247 int count = maybe_split_mode (&mode);
249 if (count == 2)
251 write_one_arg (s, NULL_TREE, i, mode, false);
252 write_one_arg (s, NULL_TREE, i + 1, mode, false);
253 return i + 1;
256 if (no_arg_types && !AGGREGATE_TYPE_P (type))
258 if (mode == SFmode)
259 mode = DFmode;
260 mode = arg_promotion (mode);
263 if (i > 0)
264 s << ", ";
265 s << ".param" << nvptx_ptx_type_from_mode (mode, false) << " %in_ar"
266 << (i + 1) << (mode == QImode || mode == HImode ? "[1]" : "");
267 if (mode == BLKmode)
268 s << "[" << int_size_in_bytes (type) << "]";
269 return i;
272 /* Look for attributes in ATTRS that would indicate we must write a function
273 as a .entry kernel rather than a .func. Return true if one is found. */
275 static bool
276 write_as_kernel (tree attrs)
278 return (lookup_attribute ("kernel", attrs) != NULL_TREE
279 || lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE);
282 /* Write a function decl for DECL to S, where NAME is the name to be used. */
284 static void
285 nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree decl)
287 tree fntype = TREE_TYPE (decl);
288 tree result_type = TREE_TYPE (fntype);
289 tree args = TYPE_ARG_TYPES (fntype);
290 tree attrs = DECL_ATTRIBUTES (decl);
291 bool kernel = write_as_kernel (attrs);
292 bool is_main = strcmp (name, "main") == 0;
293 bool args_from_decl = false;
295 /* We get:
296 NULL in TYPE_ARG_TYPES, for old-style functions
297 NULL in DECL_ARGUMENTS, for builtin functions without another
298 declaration.
299 So we have to pick the best one we have. */
300 if (args == 0)
302 args = DECL_ARGUMENTS (decl);
303 args_from_decl = true;
306 if (DECL_EXTERNAL (decl))
307 s << ".extern ";
308 else if (TREE_PUBLIC (decl))
309 s << ".visible ";
311 if (kernel)
312 s << ".entry ";
313 else
314 s << ".func ";
316 /* Declare the result. */
317 bool return_in_mem = false;
318 if (TYPE_MODE (result_type) != VOIDmode)
320 machine_mode mode = TYPE_MODE (result_type);
321 if (!RETURN_IN_REG_P (mode))
322 return_in_mem = true;
323 else
325 mode = arg_promotion (mode);
326 s << "(.param" << nvptx_ptx_type_from_mode (mode, false)
327 << " %out_retval)";
331 if (name[0] == '*')
332 s << (name + 1);
333 else
334 s << name;
336 /* Declare argument types. */
337 if ((args != NULL_TREE
338 && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node))
339 || is_main
340 || return_in_mem
341 || DECL_STATIC_CHAIN (decl))
343 s << "(";
344 int i = 0;
345 bool any_args = false;
346 if (return_in_mem)
348 s << ".param.u" << GET_MODE_BITSIZE (Pmode) << " %in_ar1";
349 i++;
351 while (args != NULL_TREE)
353 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
354 machine_mode mode = TYPE_MODE (type);
356 if (mode != VOIDmode)
358 i = write_one_arg (s, type, i, mode,
359 TYPE_ARG_TYPES (fntype) == 0);
360 any_args = true;
361 i++;
363 args = TREE_CHAIN (args);
365 if (stdarg_p (fntype))
367 gcc_assert (i > 0);
368 s << ", .param.u" << GET_MODE_BITSIZE (Pmode) << " %in_argp";
370 if (DECL_STATIC_CHAIN (decl))
372 if (i > 0)
373 s << ", ";
374 s << ".reg.u" << GET_MODE_BITSIZE (Pmode)
375 << reg_names [STATIC_CHAIN_REGNUM];
377 if (!any_args && is_main)
378 s << ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode)
379 << " %argv";
380 s << ")";
384 /* Walk either ARGTYPES or ARGS if the former is null, and write out part of
385 the function header to FILE. If WRITE_COPY is false, write reg
386 declarations, otherwise write the copy from the incoming argument to that
387 reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1
388 instead of 0. */
390 static void
391 walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy,
392 bool return_in_mem)
394 int i;
396 bool args_from_decl = false;
397 if (argtypes == 0)
398 args_from_decl = true;
399 else
400 args = argtypes;
402 for (i = return_in_mem ? 1 : 0; args != NULL_TREE; args = TREE_CHAIN (args))
404 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
405 machine_mode mode = TYPE_MODE (type);
407 if (mode == VOIDmode)
408 break;
410 if (!PASS_IN_REG_P (mode, type))
411 mode = Pmode;
413 int count = maybe_split_mode (&mode);
414 if (count == 1)
416 if (argtypes == NULL && !AGGREGATE_TYPE_P (type))
418 if (mode == SFmode)
419 mode = DFmode;
422 mode = arg_promotion (mode);
424 while (count-- > 0)
426 i++;
427 if (write_copy)
428 fprintf (file, "\tld.param%s %%ar%d, [%%in_ar%d];\n",
429 nvptx_ptx_type_from_mode (mode, false), i, i);
430 else
431 fprintf (file, "\t.reg%s %%ar%d;\n",
432 nvptx_ptx_type_from_mode (mode, false), i);
437 /* Write a .func or .kernel declaration (not a definition) along with
438 a helper comment for use by ld. S is the stream to write to, DECL
439 the decl for the function with name NAME. */
441 static void
442 write_function_decl_and_comment (std::stringstream &s, const char *name, const_tree decl)
444 s << "// BEGIN";
445 if (TREE_PUBLIC (decl))
446 s << " GLOBAL";
447 s << " FUNCTION DECL: ";
448 if (name[0] == '*')
449 s << (name + 1);
450 else
451 s << name;
452 s << "\n";
453 nvptx_write_function_decl (s, name, decl);
454 s << ";\n";
457 /* Check NAME for special function names and redirect them by returning a
458 replacement. This applies to malloc, free and realloc, for which we
459 want to use libgcc wrappers, and call, which triggers a bug in ptxas. */
461 static const char *
462 nvptx_name_replacement (const char *name)
464 if (strcmp (name, "call") == 0)
465 return "__nvptx_call";
466 if (strcmp (name, "malloc") == 0)
467 return "__nvptx_malloc";
468 if (strcmp (name, "free") == 0)
469 return "__nvptx_free";
470 if (strcmp (name, "realloc") == 0)
471 return "__nvptx_realloc";
472 return name;
475 /* If DECL is a FUNCTION_DECL, check the hash table to see if we
476 already encountered it, and if not, insert it and write a ptx
477 declarations that will be output at the end of compilation. */
479 static bool
480 nvptx_record_fndecl (tree decl, bool force = false)
482 if (decl == NULL_TREE || TREE_CODE (decl) != FUNCTION_DECL
483 || !DECL_EXTERNAL (decl))
484 return true;
486 if (!force && TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE)
487 return false;
489 tree *slot = declared_fndecls_htab->find_slot (decl, INSERT);
490 if (*slot == NULL)
492 *slot = decl;
493 const char *name = get_fnname_from_decl (decl);
494 name = nvptx_name_replacement (name);
495 write_function_decl_and_comment (func_decls, name, decl);
497 return true;
500 /* Record that we need to emit a ptx decl for DECL. Either do it now, or
501 record it for later in case we have no argument information at this
502 point. */
504 void
505 nvptx_record_needed_fndecl (tree decl)
507 if (nvptx_record_fndecl (decl))
508 return;
510 tree *slot = needed_fndecls_htab->find_slot (decl, INSERT);
511 if (*slot == NULL)
512 *slot = decl;
515 /* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
516 function, including local var decls and copies from the arguments to
517 local regs. */
519 void
520 nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
522 tree fntype = TREE_TYPE (decl);
523 tree result_type = TREE_TYPE (fntype);
525 name = nvptx_name_replacement (name);
527 std::stringstream s;
528 write_function_decl_and_comment (s, name, decl);
529 s << "// BEGIN";
530 if (TREE_PUBLIC (decl))
531 s << " GLOBAL";
532 s << " FUNCTION DEF: ";
534 if (name[0] == '*')
535 s << (name + 1);
536 else
537 s << name;
538 s << "\n";
540 nvptx_write_function_decl (s, name, decl);
541 fprintf (file, "%s", s.str().c_str());
543 bool return_in_mem = false;
544 if (TYPE_MODE (result_type) != VOIDmode)
546 machine_mode mode = TYPE_MODE (result_type);
547 if (!RETURN_IN_REG_P (mode))
548 return_in_mem = true;
551 fprintf (file, "\n{\n");
553 /* Ensure all arguments that should live in a register have one
554 declared. We'll emit the copies below. */
555 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
556 false, return_in_mem);
557 if (return_in_mem)
558 fprintf (file, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode));
559 else if (TYPE_MODE (result_type) != VOIDmode)
561 machine_mode mode = arg_promotion (TYPE_MODE (result_type));
562 fprintf (file, ".reg%s %%retval;\n",
563 nvptx_ptx_type_from_mode (mode, false));
566 if (stdarg_p (fntype))
567 fprintf (file, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode));
569 fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode),
570 reg_names[OUTGOING_STATIC_CHAIN_REGNUM]);
572 /* Declare the pseudos we have as ptx registers. */
573 int maxregs = max_reg_num ();
574 for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
576 if (regno_reg_rtx[i] != const0_rtx)
578 machine_mode mode = PSEUDO_REGNO_MODE (i);
579 int count = maybe_split_mode (&mode);
580 if (count > 1)
582 while (count-- > 0)
583 fprintf (file, "\t.reg%s %%r%d$%d;\n",
584 nvptx_ptx_type_from_mode (mode, true),
585 i, count);
587 else
588 fprintf (file, "\t.reg%s %%r%d;\n",
589 nvptx_ptx_type_from_mode (mode, true),
594 /* The only reason we might be using outgoing args is if we call a stdargs
595 function. Allocate the space for this. If we called varargs functions
596 without passing any variadic arguments, we'll see a reference to outargs
597 even with a zero outgoing_args_size. */
598 HOST_WIDE_INT sz = crtl->outgoing_args_size;
599 if (sz == 0)
600 sz = 1;
601 if (cfun->machine->has_call_with_varargs)
602 fprintf (file, "\t.reg.u%d %%outargs;\n"
603 "\t.local.align 8 .b8 %%outargs_ar["HOST_WIDE_INT_PRINT_DEC"];\n",
604 BITS_PER_WORD, sz);
605 if (cfun->machine->punning_buffer_size > 0)
606 fprintf (file, "\t.reg.u%d %%punbuffer;\n"
607 "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
608 BITS_PER_WORD, cfun->machine->punning_buffer_size);
610 /* Declare a local variable for the frame. */
611 sz = get_frame_size ();
612 if (sz > 0 || cfun->machine->has_call_with_sc)
614 fprintf (file, "\t.reg.u%d %%frame;\n"
615 "\t.local.align 8 .b8 %%farray["HOST_WIDE_INT_PRINT_DEC"];\n",
616 BITS_PER_WORD, sz == 0 ? 1 : sz);
617 fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n",
618 BITS_PER_WORD);
621 if (cfun->machine->has_call_with_varargs)
622 fprintf (file, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n",
623 BITS_PER_WORD);
624 if (cfun->machine->punning_buffer_size > 0)
625 fprintf (file, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
626 BITS_PER_WORD);
628 /* Now emit any copies necessary for arguments. */
629 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
630 true, return_in_mem);
631 if (return_in_mem)
632 fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n",
633 GET_MODE_BITSIZE (Pmode));
634 if (stdarg_p (fntype))
635 fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n",
636 GET_MODE_BITSIZE (Pmode));
639 /* Output a return instruction. Also copy the return value to its outgoing
640 location. */
642 const char *
643 nvptx_output_return (void)
645 tree fntype = TREE_TYPE (current_function_decl);
646 tree result_type = TREE_TYPE (fntype);
647 if (TYPE_MODE (result_type) != VOIDmode)
649 machine_mode mode = TYPE_MODE (result_type);
650 if (RETURN_IN_REG_P (mode))
652 mode = arg_promotion (mode);
653 fprintf (asm_out_file, "\tst.param%s\t[%%out_retval], %%retval;\n",
654 nvptx_ptx_type_from_mode (mode, false));
658 return "ret;";
661 /* Construct a function declaration from a call insn. This can be
662 necessary for two reasons - either we have an indirect call which
663 requires a .callprototype declaration, or we have a libcall
664 generated by emit_library_call for which no decl exists. */
666 static void
667 write_func_decl_from_insn (std::stringstream &s, rtx result, rtx pat,
668 rtx callee)
670 bool callprototype = register_operand (callee, Pmode);
671 const char *name = "_";
672 if (!callprototype)
674 name = XSTR (callee, 0);
675 name = nvptx_name_replacement (name);
676 s << "// BEGIN GLOBAL FUNCTION DECL: " << name << "\n";
678 s << (callprototype ? "\t.callprototype\t" : "\t.extern .func ");
680 if (result != NULL_RTX)
682 s << "(.param";
683 s << nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
684 false);
685 s << " ";
686 if (callprototype)
687 s << "_";
688 else
689 s << "%out_retval";
690 s << ")";
693 s << name;
695 int nargs = XVECLEN (pat, 0) - 1;
696 if (nargs > 0)
698 s << " (";
699 for (int i = 0; i < nargs; i++)
701 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
702 machine_mode mode = GET_MODE (t);
703 int count = maybe_split_mode (&mode);
705 while (count-- > 0)
707 s << ".param";
708 s << nvptx_ptx_type_from_mode (mode, false);
709 s << " ";
710 if (callprototype)
711 s << "_";
712 else
713 s << "%arg" << i;
714 if (mode == QImode || mode == HImode)
715 s << "[1]";
716 if (i + 1 < nargs || count > 0)
717 s << ", ";
720 s << ")";
722 s << ";\n";
725 /* Terminate a function by writing a closing brace to FILE. */
727 void
728 nvptx_function_end (FILE *file)
730 fprintf (file, "\t}\n");
733 /* Decide whether we can make a sibling call to a function. For ptx, we
734 can't. */
736 static bool
737 nvptx_function_ok_for_sibcall (tree, tree)
739 return false;
742 /* Implement the TARGET_CALL_ARGS hook. Record information about one
743 argument to the next call. */
745 static void
746 nvptx_call_args (rtx arg, tree funtype)
748 if (cfun->machine->start_call == NULL_RTX)
750 cfun->machine->call_args = NULL;
751 cfun->machine->funtype = funtype;
752 cfun->machine->start_call = const0_rtx;
754 if (arg == pc_rtx)
755 return;
757 rtx_expr_list *args_so_far = cfun->machine->call_args;
758 if (REG_P (arg))
759 cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far);
762 /* Implement the corresponding END_CALL_ARGS hook. Clear and free the
763 information we recorded. */
765 static void
766 nvptx_end_call_args (void)
768 cfun->machine->start_call = NULL_RTX;
769 free_EXPR_LIST_list (&cfun->machine->call_args);
772 /* Emit the sequence for a call. */
774 void
775 nvptx_expand_call (rtx retval, rtx address)
777 int nargs;
778 rtx callee = XEXP (address, 0);
779 rtx pat, t;
780 rtvec vec;
781 bool external_decl = false;
783 nargs = 0;
784 for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
785 nargs++;
787 bool has_varargs = false;
788 tree decl_type = NULL_TREE;
790 if (!call_insn_operand (callee, Pmode))
792 callee = force_reg (Pmode, callee);
793 address = change_address (address, QImode, callee);
796 if (GET_CODE (callee) == SYMBOL_REF)
798 tree decl = SYMBOL_REF_DECL (callee);
799 if (decl != NULL_TREE)
801 decl_type = TREE_TYPE (decl);
802 if (DECL_STATIC_CHAIN (decl))
803 cfun->machine->has_call_with_sc = true;
804 if (DECL_EXTERNAL (decl))
805 external_decl = true;
808 if (cfun->machine->funtype
809 /* It's possible to construct testcases where we call a variable.
810 See compile/20020129-1.c. stdarg_p will crash so avoid calling it
811 in such a case. */
812 && (TREE_CODE (cfun->machine->funtype) == FUNCTION_TYPE
813 || TREE_CODE (cfun->machine->funtype) == METHOD_TYPE)
814 && stdarg_p (cfun->machine->funtype))
816 has_varargs = true;
817 cfun->machine->has_call_with_varargs = true;
819 vec = rtvec_alloc (nargs + 1 + (has_varargs ? 1 : 0));
820 pat = gen_rtx_PARALLEL (VOIDmode, vec);
821 if (has_varargs)
823 rtx this_arg = gen_reg_rtx (Pmode);
824 if (Pmode == DImode)
825 emit_move_insn (this_arg, stack_pointer_rtx);
826 else
827 emit_move_insn (this_arg, stack_pointer_rtx);
828 XVECEXP (pat, 0, nargs + 1) = gen_rtx_USE (VOIDmode, this_arg);
831 int i;
832 rtx arg;
833 for (i = 1, arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1), i++)
835 rtx this_arg = XEXP (arg, 0);
836 XVECEXP (pat, 0, i) = gen_rtx_USE (VOIDmode, this_arg);
839 rtx tmp_retval = retval;
840 t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
841 if (retval != NULL_RTX)
843 if (!nvptx_register_operand (retval, GET_MODE (retval)))
844 tmp_retval = gen_reg_rtx (GET_MODE (retval));
845 t = gen_rtx_SET (tmp_retval, t);
847 XVECEXP (pat, 0, 0) = t;
848 if (!REG_P (callee)
849 && (decl_type == NULL_TREE
850 || (external_decl && TYPE_ARG_TYPES (decl_type) == NULL_TREE)))
852 rtx *slot = declared_libfuncs_htab->find_slot (callee, INSERT);
853 if (*slot == NULL)
855 *slot = callee;
856 write_func_decl_from_insn (func_decls, retval, pat, callee);
859 emit_call_insn (pat);
860 if (tmp_retval != retval)
861 emit_move_insn (retval, tmp_retval);
864 /* Implement TARGET_FUNCTION_ARG. */
866 static rtx
867 nvptx_function_arg (cumulative_args_t, machine_mode mode,
868 const_tree, bool named)
870 if (mode == VOIDmode)
871 return NULL_RTX;
873 if (named)
874 return gen_reg_rtx (mode);
875 return NULL_RTX;
878 /* Implement TARGET_FUNCTION_INCOMING_ARG. */
880 static rtx
881 nvptx_function_incoming_arg (cumulative_args_t cum_v, machine_mode mode,
882 const_tree, bool named)
884 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
885 if (mode == VOIDmode)
886 return NULL_RTX;
888 if (!named)
889 return NULL_RTX;
891 /* No need to deal with split modes here, the only case that can
892 happen is complex modes and those are dealt with by
893 TARGET_SPLIT_COMPLEX_ARG. */
894 return gen_rtx_UNSPEC (mode,
895 gen_rtvec (1, GEN_INT (1 + cum->count)),
896 UNSPEC_ARG_REG);
899 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
901 static void
902 nvptx_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
903 const_tree type ATTRIBUTE_UNUSED,
904 bool named ATTRIBUTE_UNUSED)
906 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
907 if (mode == TImode)
908 cum->count += 2;
909 else
910 cum->count++;
913 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
915 For nvptx, we know how to handle functions declared as stdarg: by
916 passing an extra pointer to the unnamed arguments. However, the
917 Fortran frontend can produce a different situation, where a
918 function pointer is declared with no arguments, but the actual
919 function and calls to it take more arguments. In that case, we
920 want to ensure the call matches the definition of the function. */
922 static bool
923 nvptx_strict_argument_naming (cumulative_args_t cum_v)
925 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
926 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
929 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
931 static unsigned int
932 nvptx_function_arg_boundary (machine_mode mode, const_tree type)
934 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
936 if (boundary > BITS_PER_WORD)
937 return 2 * BITS_PER_WORD;
939 if (mode == BLKmode)
941 HOST_WIDE_INT size = int_size_in_bytes (type);
942 if (size > 4)
943 return 2 * BITS_PER_WORD;
944 if (boundary < BITS_PER_WORD)
946 if (size >= 3)
947 return BITS_PER_WORD;
948 if (size >= 2)
949 return 2 * BITS_PER_UNIT;
952 return boundary;
955 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
956 where function FUNC returns or receives a value of data type TYPE. */
958 static rtx
959 nvptx_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
960 bool outgoing)
962 int unsignedp = TYPE_UNSIGNED (type);
963 machine_mode orig_mode = TYPE_MODE (type);
964 machine_mode mode = promote_function_mode (type, orig_mode,
965 &unsignedp, NULL_TREE, 1);
966 if (outgoing)
967 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
968 if (cfun->machine->start_call == NULL_RTX)
969 /* Pretend to return in a hard reg for early uses before pseudos can be
970 generated. */
971 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
972 return gen_reg_rtx (mode);
975 /* Implement TARGET_LIBCALL_VALUE. */
977 static rtx
978 nvptx_libcall_value (machine_mode mode, const_rtx)
980 if (cfun->machine->start_call == NULL_RTX)
981 /* Pretend to return in a hard reg for early uses before pseudos can be
982 generated. */
983 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
984 return gen_reg_rtx (mode);
987 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
989 static bool
990 nvptx_function_value_regno_p (const unsigned int regno)
992 return regno == NVPTX_RETURN_REGNUM;
995 /* Types with a mode other than those supported by the machine are passed by
996 reference in memory. */
998 static bool
999 nvptx_pass_by_reference (cumulative_args_t, machine_mode mode,
1000 const_tree type, bool)
1002 return !PASS_IN_REG_P (mode, type);
1005 /* Implement TARGET_RETURN_IN_MEMORY. */
1007 static bool
1008 nvptx_return_in_memory (const_tree type, const_tree)
1010 machine_mode mode = TYPE_MODE (type);
1011 if (!RETURN_IN_REG_P (mode))
1012 return true;
1013 return false;
1016 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
1018 static machine_mode
1019 nvptx_promote_function_mode (const_tree type, machine_mode mode,
1020 int *punsignedp,
1021 const_tree funtype, int for_return)
1023 if (type == NULL_TREE)
1024 return mode;
1025 if (for_return)
1026 return promote_mode (type, mode, punsignedp);
1027 /* For K&R-style functions, try to match the language promotion rules to
1028 minimize type mismatches at assembly time. */
1029 if (TYPE_ARG_TYPES (funtype) == NULL_TREE
1030 && type != NULL_TREE
1031 && !AGGREGATE_TYPE_P (type))
1033 if (mode == SFmode)
1034 mode = DFmode;
1035 mode = arg_promotion (mode);
1038 return mode;
1041 /* Implement TARGET_STATIC_CHAIN. */
1043 static rtx
1044 nvptx_static_chain (const_tree fndecl, bool incoming_p)
1046 if (!DECL_STATIC_CHAIN (fndecl))
1047 return NULL;
1049 if (incoming_p)
1050 return gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
1051 else
1052 return gen_rtx_REG (Pmode, OUTGOING_STATIC_CHAIN_REGNUM);
1055 /* Emit a comparison COMPARE, and return the new test to be used in the
1056 jump. */
1059 nvptx_expand_compare (rtx compare)
1061 rtx pred = gen_reg_rtx (BImode);
1062 rtx cmp = gen_rtx_fmt_ee (GET_CODE (compare), BImode,
1063 XEXP (compare, 0), XEXP (compare, 1));
1064 emit_insn (gen_rtx_SET (pred, cmp));
1065 return gen_rtx_NE (BImode, pred, const0_rtx);
1068 /* When loading an operand ORIG_OP, verify whether an address space
1069 conversion to generic is required, and if so, perform it. Also
1070 check for SYMBOL_REFs for function decls and call
1071 nvptx_record_needed_fndecl as needed.
1072 Return either the original operand, or the converted one. */
1075 nvptx_maybe_convert_symbolic_operand (rtx orig_op)
1077 if (GET_MODE (orig_op) != Pmode)
1078 return orig_op;
1080 rtx op = orig_op;
1081 while (GET_CODE (op) == PLUS || GET_CODE (op) == CONST)
1082 op = XEXP (op, 0);
1083 if (GET_CODE (op) != SYMBOL_REF)
1084 return orig_op;
1086 tree decl = SYMBOL_REF_DECL (op);
1087 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
1089 nvptx_record_needed_fndecl (decl);
1090 return orig_op;
1093 addr_space_t as = nvptx_addr_space_from_address (op);
1094 if (as == ADDR_SPACE_GENERIC)
1095 return orig_op;
1097 enum unspec code;
1098 code = (as == ADDR_SPACE_GLOBAL ? UNSPEC_FROM_GLOBAL
1099 : as == ADDR_SPACE_LOCAL ? UNSPEC_FROM_LOCAL
1100 : as == ADDR_SPACE_SHARED ? UNSPEC_FROM_SHARED
1101 : as == ADDR_SPACE_CONST ? UNSPEC_FROM_CONST
1102 : UNSPEC_FROM_PARAM);
1103 rtx dest = gen_reg_rtx (Pmode);
1104 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig_op),
1105 code)));
1106 return dest;
1109 /* Returns true if X is a valid address for use in a memory reference. */
1111 static bool
1112 nvptx_legitimate_address_p (machine_mode, rtx x, bool)
1114 enum rtx_code code = GET_CODE (x);
1116 switch (code)
1118 case REG:
1119 return true;
1121 case PLUS:
1122 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1123 return true;
1124 return false;
1126 case CONST:
1127 case SYMBOL_REF:
1128 case LABEL_REF:
1129 return true;
1131 default:
1132 return false;
1136 /* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
1137 to ensure that the return register's mode isn't changed. */
1139 bool
1140 nvptx_hard_regno_mode_ok (int regno, machine_mode mode)
1142 if (regno != NVPTX_RETURN_REGNUM
1143 || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode)
1144 return true;
1145 return mode == cfun->machine->ret_reg_mode;
1148 /* Convert an address space AS to the corresponding ptx string. */
1150 const char *
1151 nvptx_section_from_addr_space (addr_space_t as)
1153 switch (as)
1155 case ADDR_SPACE_CONST:
1156 return ".const";
1158 case ADDR_SPACE_GLOBAL:
1159 return ".global";
1161 case ADDR_SPACE_SHARED:
1162 return ".shared";
1164 case ADDR_SPACE_GENERIC:
1165 return "";
1167 default:
1168 gcc_unreachable ();
1172 /* Determine whether DECL goes into .const or .global. */
1174 const char *
1175 nvptx_section_for_decl (const_tree decl)
1177 bool is_const = (CONSTANT_CLASS_P (decl)
1178 || TREE_CODE (decl) == CONST_DECL
1179 || TREE_READONLY (decl));
1180 if (is_const)
1181 return ".const";
1183 return ".global";
1186 /* Look for a SYMBOL_REF in ADDR and return the address space to be used
1187 for the insn referencing this address. */
1189 addr_space_t
1190 nvptx_addr_space_from_address (rtx addr)
1192 while (GET_CODE (addr) == PLUS || GET_CODE (addr) == CONST)
1193 addr = XEXP (addr, 0);
1194 if (GET_CODE (addr) != SYMBOL_REF)
1195 return ADDR_SPACE_GENERIC;
1197 tree decl = SYMBOL_REF_DECL (addr);
1198 if (decl == NULL_TREE || TREE_CODE (decl) == FUNCTION_DECL)
1199 return ADDR_SPACE_GENERIC;
1201 bool is_const = (CONSTANT_CLASS_P (decl)
1202 || TREE_CODE (decl) == CONST_DECL
1203 || TREE_READONLY (decl));
1204 if (is_const)
1205 return ADDR_SPACE_CONST;
1207 return ADDR_SPACE_GLOBAL;
1210 /* Machinery to output constant initializers. */
1212 /* Used when assembling integers to ensure data is emitted in
1213 pieces whose size matches the declaration we printed. */
1214 static unsigned int decl_chunk_size;
1215 static machine_mode decl_chunk_mode;
1216 /* Used in the same situation, to keep track of the byte offset
1217 into the initializer. */
1218 static unsigned HOST_WIDE_INT decl_offset;
1219 /* The initializer part we are currently processing. */
1220 static HOST_WIDE_INT init_part;
1221 /* The total size of the object. */
1222 static unsigned HOST_WIDE_INT object_size;
1223 /* True if we found a skip extending to the end of the object. Used to
1224 assert that no data follows. */
1225 static bool object_finished;
1227 /* Write the necessary separator string to begin a new initializer value. */
1229 static void
1230 begin_decl_field (void)
1232 /* We never see decl_offset at zero by the time we get here. */
1233 if (decl_offset == decl_chunk_size)
1234 fprintf (asm_out_file, " = { ");
1235 else
1236 fprintf (asm_out_file, ", ");
1239 /* Output the currently stored chunk as an initializer value. */
1241 static void
1242 output_decl_chunk (void)
1244 begin_decl_field ();
1245 output_address (gen_int_mode (init_part, decl_chunk_mode));
1246 init_part = 0;
1249 /* Add value VAL sized SIZE to the data we're emitting, and keep writing
1250 out chunks as they fill up. */
1252 static void
1253 nvptx_assemble_value (HOST_WIDE_INT val, unsigned int size)
1255 unsigned HOST_WIDE_INT chunk_offset = decl_offset % decl_chunk_size;
1256 gcc_assert (!object_finished);
1257 while (size > 0)
1259 int this_part = size;
1260 if (chunk_offset + this_part > decl_chunk_size)
1261 this_part = decl_chunk_size - chunk_offset;
1262 HOST_WIDE_INT val_part;
1263 HOST_WIDE_INT mask = 2;
1264 mask <<= this_part * BITS_PER_UNIT - 1;
1265 val_part = val & (mask - 1);
1266 init_part |= val_part << (BITS_PER_UNIT * chunk_offset);
1267 val >>= BITS_PER_UNIT * this_part;
1268 size -= this_part;
1269 decl_offset += this_part;
1270 if (decl_offset % decl_chunk_size == 0)
1271 output_decl_chunk ();
1273 chunk_offset = 0;
1277 /* Target hook for assembling integer object X of size SIZE. */
1279 static bool
1280 nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
1282 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
1284 gcc_assert (size = decl_chunk_size);
1285 if (decl_offset % decl_chunk_size != 0)
1286 sorry ("cannot emit unaligned pointers in ptx assembly");
1287 decl_offset += size;
1288 begin_decl_field ();
1290 HOST_WIDE_INT off = 0;
1291 if (GET_CODE (x) == CONST)
1292 x = XEXP (x, 0);
1293 if (GET_CODE (x) == PLUS)
1295 off = INTVAL (XEXP (x, 1));
1296 x = XEXP (x, 0);
1298 if (GET_CODE (x) == SYMBOL_REF)
1300 nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x));
1301 fprintf (asm_out_file, "generic(");
1302 output_address (x);
1303 fprintf (asm_out_file, ")");
1305 if (off != 0)
1306 fprintf (asm_out_file, " + " HOST_WIDE_INT_PRINT_DEC, off);
1307 return true;
1310 HOST_WIDE_INT val;
1311 switch (GET_CODE (x))
1313 case CONST_INT:
1314 val = INTVAL (x);
1315 break;
1316 case CONST_DOUBLE:
1317 gcc_unreachable ();
1318 break;
1319 default:
1320 gcc_unreachable ();
1323 nvptx_assemble_value (val, size);
1324 return true;
1327 /* Output SIZE zero bytes. We ignore the FILE argument since the
1328 functions we're calling to perform the output just use
1329 asm_out_file. */
1331 void
1332 nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
1334 if (decl_offset + size >= object_size)
1336 if (decl_offset % decl_chunk_size != 0)
1337 nvptx_assemble_value (0, decl_chunk_size);
1338 object_finished = true;
1339 return;
1342 while (size > decl_chunk_size)
1344 nvptx_assemble_value (0, decl_chunk_size);
1345 size -= decl_chunk_size;
1347 while (size-- > 0)
1348 nvptx_assemble_value (0, 1);
1351 /* Output a string STR with length SIZE. As in nvptx_output_skip we
1352 ignore the FILE arg. */
1354 void
1355 nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size)
1357 for (unsigned HOST_WIDE_INT i = 0; i < size; i++)
1358 nvptx_assemble_value (str[i], 1);
1361 /* Called when the initializer for a decl has been completely output through
1362 combinations of the three functions above. */
1364 static void
1365 nvptx_assemble_decl_end (void)
1367 if (decl_offset != 0)
1369 if (!object_finished && decl_offset % decl_chunk_size != 0)
1370 nvptx_assemble_value (0, decl_chunk_size);
1372 fprintf (asm_out_file, " }");
1374 fprintf (asm_out_file, ";\n");
1377 /* Start a declaration of a variable of TYPE with NAME to
1378 FILE. IS_PUBLIC says whether this will be externally visible.
1379 Here we just write the linker hint and decide on the chunk size
1380 to use. */
1382 static void
1383 init_output_initializer (FILE *file, const char *name, const_tree type,
1384 bool is_public)
1386 fprintf (file, "// BEGIN%s VAR DEF: ", is_public ? " GLOBAL" : "");
1387 assemble_name_raw (file, name);
1388 fputc ('\n', file);
1390 if (TREE_CODE (type) == ARRAY_TYPE)
1391 type = TREE_TYPE (type);
1392 int sz = int_size_in_bytes (type);
1393 if ((TREE_CODE (type) != INTEGER_TYPE
1394 && TREE_CODE (type) != ENUMERAL_TYPE
1395 && TREE_CODE (type) != REAL_TYPE)
1396 || sz < 0
1397 || sz > HOST_BITS_PER_WIDE_INT)
1398 type = ptr_type_node;
1399 decl_chunk_size = int_size_in_bytes (type);
1400 decl_chunk_mode = int_mode_for_mode (TYPE_MODE (type));
1401 decl_offset = 0;
1402 init_part = 0;
1403 object_finished = false;
1406 /* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
1407 writing a constant variable EXP with NAME and SIZE and its
1408 initializer to FILE. */
1410 static void
1411 nvptx_asm_declare_constant_name (FILE *file, const char *name,
1412 const_tree exp, HOST_WIDE_INT size)
1414 tree type = TREE_TYPE (exp);
1415 init_output_initializer (file, name, type, false);
1416 fprintf (file, "\t.const .align %d .u%d ",
1417 TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT,
1418 decl_chunk_size * BITS_PER_UNIT);
1419 assemble_name (file, name);
1420 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1421 (size + decl_chunk_size - 1) / decl_chunk_size);
1422 object_size = size;
1425 /* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
1426 a variable DECL with NAME to FILE. */
1428 void
1429 nvptx_declare_object_name (FILE *file, const char *name, const_tree decl)
1431 if (decl && DECL_SIZE (decl))
1433 tree type = TREE_TYPE (decl);
1434 unsigned HOST_WIDE_INT size;
1436 init_output_initializer (file, name, type, TREE_PUBLIC (decl));
1437 size = tree_to_uhwi (DECL_SIZE_UNIT (decl));
1438 const char *section = nvptx_section_for_decl (decl);
1439 fprintf (file, "\t%s%s .align %d .u%d ",
1440 TREE_PUBLIC (decl) ? " .visible" : "", section,
1441 DECL_ALIGN (decl) / BITS_PER_UNIT,
1442 decl_chunk_size * BITS_PER_UNIT);
1443 assemble_name (file, name);
1444 if (size > 0)
1445 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1446 (size + decl_chunk_size - 1) / decl_chunk_size);
1447 else
1448 object_finished = true;
1449 object_size = size;
1453 /* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
1455 static void
1456 nvptx_globalize_label (FILE *, const char *)
1460 /* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
1461 declaration only for variable DECL with NAME to FILE. */
1462 static void
1463 nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
1465 if (TREE_CODE (decl) != VAR_DECL)
1466 return;
1467 const char *section = nvptx_section_for_decl (decl);
1468 fprintf (file, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl) ? " GLOBAL" : "");
1469 assemble_name_raw (file, name);
1470 fputs ("\n", file);
1471 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
1472 fprintf (file, ".extern %s .b8 ", section);
1473 assemble_name_raw (file, name);
1474 if (size > 0)
1475 fprintf (file, "["HOST_WIDE_INT_PRINT_DEC"]", size);
1476 fprintf (file, ";\n\n");
1479 /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
1480 involves writing .param declarations and in/out copies into them. */
1482 const char *
1483 nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
1485 char buf[256];
1486 static int labelno;
1487 bool needs_tgt = register_operand (callee, Pmode);
1488 rtx pat = PATTERN (insn);
1489 int nargs = XVECLEN (pat, 0) - 1;
1490 tree decl = NULL_TREE;
1492 fprintf (asm_out_file, "\t{\n");
1493 if (result != NULL)
1495 fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n",
1496 nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
1497 false));
1500 if (GET_CODE (callee) == SYMBOL_REF)
1502 decl = SYMBOL_REF_DECL (callee);
1503 if (decl && DECL_EXTERNAL (decl))
1504 nvptx_record_fndecl (decl);
1507 if (needs_tgt)
1509 ASM_GENERATE_INTERNAL_LABEL (buf, "LCT", labelno);
1510 labelno++;
1511 ASM_OUTPUT_LABEL (asm_out_file, buf);
1512 std::stringstream s;
1513 write_func_decl_from_insn (s, result, pat, callee);
1514 fputs (s.str().c_str(), asm_out_file);
1517 for (int i = 0, argno = 0; i < nargs; i++)
1519 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1520 machine_mode mode = GET_MODE (t);
1521 int count = maybe_split_mode (&mode);
1523 while (count-- > 0)
1524 fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
1525 nvptx_ptx_type_from_mode (mode, false), argno++,
1526 mode == QImode || mode == HImode ? "[1]" : "");
1528 for (int i = 0, argno = 0; i < nargs; i++)
1530 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1531 gcc_assert (REG_P (t));
1532 machine_mode mode = GET_MODE (t);
1533 int count = maybe_split_mode (&mode);
1535 if (count == 1)
1536 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
1537 nvptx_ptx_type_from_mode (mode, false), argno++,
1538 REGNO (t));
1539 else
1541 int n = 0;
1542 while (count-- > 0)
1543 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
1544 nvptx_ptx_type_from_mode (mode, false), argno++,
1545 REGNO (t), n++);
1549 fprintf (asm_out_file, "\t\tcall ");
1550 if (result != NULL_RTX)
1551 fprintf (asm_out_file, "(%%retval_in), ");
1553 if (decl)
1555 const char *name = get_fnname_from_decl (decl);
1556 name = nvptx_name_replacement (name);
1557 assemble_name (asm_out_file, name);
1559 else
1560 output_address (callee);
1562 if (nargs > 0 || (decl && DECL_STATIC_CHAIN (decl)))
1564 fprintf (asm_out_file, ", (");
1565 int i, argno;
1566 for (i = 0, argno = 0; i < nargs; i++)
1568 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1569 machine_mode mode = GET_MODE (t);
1570 int count = maybe_split_mode (&mode);
1572 while (count-- > 0)
1574 fprintf (asm_out_file, "%%out_arg%d", argno++);
1575 if (i + 1 < nargs || count > 0)
1576 fprintf (asm_out_file, ", ");
1579 if (decl && DECL_STATIC_CHAIN (decl))
1581 if (i > 0)
1582 fprintf (asm_out_file, ", ");
1583 fprintf (asm_out_file, "%s",
1584 reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
1587 fprintf (asm_out_file, ")");
1589 if (needs_tgt)
1591 fprintf (asm_out_file, ", ");
1592 assemble_name (asm_out_file, buf);
1594 fprintf (asm_out_file, ";\n");
1595 if (result != NULL_RTX)
1596 return "ld.param%t0\t%0, [%%retval_in];\n\t}";
1598 return "}";
1601 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
1603 static bool
1604 nvptx_print_operand_punct_valid_p (unsigned char c)
1606 return c == '.' || c== '#';
1609 static void nvptx_print_operand (FILE *, rtx, int);
1611 /* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
1613 static void
1614 nvptx_print_address_operand (FILE *file, rtx x, machine_mode)
1616 rtx off;
1617 if (GET_CODE (x) == CONST)
1618 x = XEXP (x, 0);
1619 switch (GET_CODE (x))
1621 case PLUS:
1622 off = XEXP (x, 1);
1623 output_address (XEXP (x, 0));
1624 fprintf (file, "+");
1625 output_address (off);
1626 break;
1628 case SYMBOL_REF:
1629 case LABEL_REF:
1630 output_addr_const (file, x);
1631 break;
1633 default:
1634 gcc_assert (GET_CODE (x) != MEM);
1635 nvptx_print_operand (file, x, 0);
1636 break;
1640 /* Write assembly language output for the address ADDR to FILE. */
1642 static void
1643 nvptx_print_operand_address (FILE *file, rtx addr)
1645 nvptx_print_address_operand (file, addr, VOIDmode);
1648 /* Print an operand, X, to FILE, with an optional modifier in CODE.
1650 Meaning of CODE:
1651 . -- print the predicate for the instruction or an emptry string for an
1652 unconditional one.
1653 # -- print a rounding mode for the instruction
1655 A -- print an address space identifier for a MEM
1656 c -- print an opcode suffix for a comparison operator, including a type code
1657 d -- print a CONST_INT as a vector dimension (x, y, or z)
1658 f -- print a full reg even for something that must always be split
1659 t -- print a type opcode suffix, promoting QImode to 32 bits
1660 T -- print a type size in bits
1661 u -- print a type opcode suffix without promotions. */
1663 static void
1664 nvptx_print_operand (FILE *file, rtx x, int code)
1666 rtx orig_x = x;
1667 machine_mode op_mode;
1669 if (code == '.')
1671 x = current_insn_predicate;
1672 if (x)
1674 unsigned int regno = REGNO (XEXP (x, 0));
1675 fputs ("[", file);
1676 if (GET_CODE (x) == EQ)
1677 fputs ("!", file);
1678 fputs (reg_names [regno], file);
1679 fputs ("]", file);
1681 return;
1683 else if (code == '#')
1685 fputs (".rn", file);
1686 return;
1689 enum rtx_code x_code = GET_CODE (x);
1691 switch (code)
1693 case 'A':
1695 addr_space_t as = nvptx_addr_space_from_address (XEXP (x, 0));
1696 fputs (nvptx_section_from_addr_space (as), file);
1698 break;
1700 case 'd':
1701 gcc_assert (x_code == CONST_INT);
1702 if (INTVAL (x) == 0)
1703 fputs (".x", file);
1704 else if (INTVAL (x) == 1)
1705 fputs (".y", file);
1706 else if (INTVAL (x) == 2)
1707 fputs (".z", file);
1708 else
1709 gcc_unreachable ();
1710 break;
1712 case 't':
1713 op_mode = nvptx_underlying_object_mode (x);
1714 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
1715 break;
1717 case 'u':
1718 op_mode = nvptx_underlying_object_mode (x);
1719 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false));
1720 break;
1722 case 'T':
1723 fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
1724 break;
1726 case 'j':
1727 fprintf (file, "@");
1728 goto common;
1730 case 'J':
1731 fprintf (file, "@!");
1732 goto common;
1734 case 'c':
1735 op_mode = GET_MODE (XEXP (x, 0));
1736 switch (x_code)
1738 case EQ:
1739 fputs (".eq", file);
1740 break;
1741 case NE:
1742 if (FLOAT_MODE_P (op_mode))
1743 fputs (".neu", file);
1744 else
1745 fputs (".ne", file);
1746 break;
1747 case LE:
1748 fputs (".le", file);
1749 break;
1750 case GE:
1751 fputs (".ge", file);
1752 break;
1753 case LT:
1754 fputs (".lt", file);
1755 break;
1756 case GT:
1757 fputs (".gt", file);
1758 break;
1759 case LEU:
1760 fputs (".ls", file);
1761 break;
1762 case GEU:
1763 fputs (".hs", file);
1764 break;
1765 case LTU:
1766 fputs (".lo", file);
1767 break;
1768 case GTU:
1769 fputs (".hi", file);
1770 break;
1771 case LTGT:
1772 fputs (".ne", file);
1773 break;
1774 case UNEQ:
1775 fputs (".equ", file);
1776 break;
1777 case UNLE:
1778 fputs (".leu", file);
1779 break;
1780 case UNGE:
1781 fputs (".geu", file);
1782 break;
1783 case UNLT:
1784 fputs (".ltu", file);
1785 break;
1786 case UNGT:
1787 fputs (".gtu", file);
1788 break;
1789 case UNORDERED:
1790 fputs (".nan", file);
1791 break;
1792 case ORDERED:
1793 fputs (".num", file);
1794 break;
1795 default:
1796 gcc_unreachable ();
1798 if (FLOAT_MODE_P (op_mode)
1799 || x_code == EQ || x_code == NE
1800 || x_code == GEU || x_code == GTU
1801 || x_code == LEU || x_code == LTU)
1802 fputs (nvptx_ptx_type_from_mode (op_mode, true), file);
1803 else
1804 fprintf (file, ".s%d", GET_MODE_BITSIZE (op_mode));
1805 break;
1806 default:
1807 common:
1808 switch (x_code)
1810 case SUBREG:
1811 x = SUBREG_REG (x);
1812 /* fall through */
1814 case REG:
1815 if (HARD_REGISTER_P (x))
1816 fprintf (file, "%s", reg_names[REGNO (x)]);
1817 else
1818 fprintf (file, "%%r%d", REGNO (x));
1819 if (code != 'f' && nvptx_split_reg_p (GET_MODE (x)))
1821 gcc_assert (GET_CODE (orig_x) == SUBREG
1822 && !nvptx_split_reg_p (GET_MODE (orig_x)));
1823 fprintf (file, "$%d", SUBREG_BYTE (orig_x) / UNITS_PER_WORD);
1825 break;
1827 case MEM:
1828 fputc ('[', file);
1829 nvptx_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
1830 fputc (']', file);
1831 break;
1833 case CONST_INT:
1834 output_addr_const (file, x);
1835 break;
1837 case CONST:
1838 case SYMBOL_REF:
1839 case LABEL_REF:
1840 /* We could use output_addr_const, but that can print things like
1841 "x-8", which breaks ptxas. Need to ensure it is output as
1842 "x+-8". */
1843 nvptx_print_address_operand (file, x, VOIDmode);
1844 break;
1846 case CONST_DOUBLE:
1847 long vals[2];
1848 REAL_VALUE_TYPE real;
1849 REAL_VALUE_FROM_CONST_DOUBLE (real, x);
1850 real_to_target (vals, &real, GET_MODE (x));
1851 vals[0] &= 0xffffffff;
1852 vals[1] &= 0xffffffff;
1853 if (GET_MODE (x) == SFmode)
1854 fprintf (file, "0f%08lx", vals[0]);
1855 else
1856 fprintf (file, "0d%08lx%08lx", vals[1], vals[0]);
1857 break;
1859 default:
1860 output_addr_const (file, x);
1865 /* Record replacement regs used to deal with subreg operands. */
1866 struct reg_replace
1868 rtx replacement[MAX_RECOG_OPERANDS];
1869 machine_mode mode;
1870 int n_allocated;
1871 int n_in_use;
1874 /* Allocate or reuse a replacement in R and return the rtx. */
1876 static rtx
1877 get_replacement (struct reg_replace *r)
1879 if (r->n_allocated == r->n_in_use)
1880 r->replacement[r->n_allocated++] = gen_reg_rtx (r->mode);
1881 return r->replacement[r->n_in_use++];
1884 /* Clean up subreg operands. In ptx assembly, everything is typed, and
1885 the presence of subregs would break the rules for most instructions.
1886 Replace them with a suitable new register of the right size, plus
1887 conversion copyin/copyout instructions. */
1889 static void
1890 nvptx_reorg (void)
1892 struct reg_replace qiregs, hiregs, siregs, diregs;
1893 rtx_insn *insn, *next;
1895 /* We are freeing block_for_insn in the toplev to keep compatibility
1896 with old MDEP_REORGS that are not CFG based. Recompute it now. */
1897 compute_bb_for_insn ();
1899 df_clear_flags (DF_LR_RUN_DCE);
1900 df_analyze ();
1902 thread_prologue_and_epilogue_insns ();
1904 qiregs.n_allocated = 0;
1905 hiregs.n_allocated = 0;
1906 siregs.n_allocated = 0;
1907 diregs.n_allocated = 0;
1908 qiregs.mode = QImode;
1909 hiregs.mode = HImode;
1910 siregs.mode = SImode;
1911 diregs.mode = DImode;
1913 for (insn = get_insns (); insn; insn = next)
1915 next = NEXT_INSN (insn);
1916 if (!NONDEBUG_INSN_P (insn)
1917 || asm_noperands (insn) >= 0
1918 || GET_CODE (PATTERN (insn)) == USE
1919 || GET_CODE (PATTERN (insn)) == CLOBBER)
1920 continue;
1921 qiregs.n_in_use = 0;
1922 hiregs.n_in_use = 0;
1923 siregs.n_in_use = 0;
1924 diregs.n_in_use = 0;
1925 extract_insn (insn);
1926 enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn);
1927 for (int i = 0; i < recog_data.n_operands; i++)
1929 rtx op = recog_data.operand[i];
1930 if (GET_CODE (op) != SUBREG)
1931 continue;
1933 rtx inner = SUBREG_REG (op);
1935 machine_mode outer_mode = GET_MODE (op);
1936 machine_mode inner_mode = GET_MODE (inner);
1937 gcc_assert (s_ok);
1938 if (s_ok
1939 && (GET_MODE_PRECISION (inner_mode)
1940 >= GET_MODE_PRECISION (outer_mode)))
1941 continue;
1942 gcc_assert (SCALAR_INT_MODE_P (outer_mode));
1943 struct reg_replace *r = (outer_mode == QImode ? &qiregs
1944 : outer_mode == HImode ? &hiregs
1945 : outer_mode == SImode ? &siregs
1946 : &diregs);
1947 rtx new_reg = get_replacement (r);
1949 if (recog_data.operand_type[i] != OP_OUT)
1951 enum rtx_code code;
1952 if (GET_MODE_PRECISION (inner_mode)
1953 < GET_MODE_PRECISION (outer_mode))
1954 code = ZERO_EXTEND;
1955 else
1956 code = TRUNCATE;
1958 rtx pat = gen_rtx_SET (new_reg,
1959 gen_rtx_fmt_e (code, outer_mode, inner));
1960 emit_insn_before (pat, insn);
1963 if (recog_data.operand_type[i] != OP_IN)
1965 enum rtx_code code;
1966 if (GET_MODE_PRECISION (inner_mode)
1967 < GET_MODE_PRECISION (outer_mode))
1968 code = TRUNCATE;
1969 else
1970 code = ZERO_EXTEND;
1972 rtx pat = gen_rtx_SET (inner,
1973 gen_rtx_fmt_e (code, inner_mode, new_reg));
1974 emit_insn_after (pat, insn);
1976 validate_change (insn, recog_data.operand_loc[i], new_reg, false);
1980 int maxregs = max_reg_num ();
1981 regstat_init_n_sets_and_refs ();
1983 for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
1984 if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
1985 regno_reg_rtx[i] = const0_rtx;
1986 regstat_free_n_sets_and_refs ();
1989 /* Handle a "kernel" attribute; arguments as in
1990 struct attribute_spec.handler. */
1992 static tree
1993 nvptx_handle_kernel_attribute (tree *node, tree name, tree ARG_UNUSED (args),
1994 int ARG_UNUSED (flags), bool *no_add_attrs)
1996 tree decl = *node;
1998 if (TREE_CODE (decl) != FUNCTION_DECL)
2000 error ("%qE attribute only applies to functions", name);
2001 *no_add_attrs = true;
2004 else if (TREE_TYPE (TREE_TYPE (decl)) != void_type_node)
2006 error ("%qE attribute requires a void return type", name);
2007 *no_add_attrs = true;
2010 return NULL_TREE;
2013 /* Table of valid machine attributes. */
2014 static const struct attribute_spec nvptx_attribute_table[] =
2016 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
2017 affects_type_identity } */
2018 { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute, false },
2019 { NULL, 0, 0, false, false, false, NULL, false }
2022 /* Limit vector alignments to BIGGEST_ALIGNMENT. */
2024 static HOST_WIDE_INT
2025 nvptx_vector_alignment (const_tree type)
2027 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
2029 return MIN (align, BIGGEST_ALIGNMENT);
2032 /* Record a symbol for mkoffload to enter into the mapping table. */
2034 static void
2035 nvptx_record_offload_symbol (tree decl)
2037 fprintf (asm_out_file, "//:%s_MAP %s\n",
2038 TREE_CODE (decl) == VAR_DECL ? "VAR" : "FUNC",
2039 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
2042 /* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
2043 at the start of a file. */
2045 static void
2046 nvptx_file_start (void)
2048 fputs ("// BEGIN PREAMBLE\n", asm_out_file);
2049 fputs ("\t.version\t3.1\n", asm_out_file);
2050 fputs ("\t.target\tsm_30\n", asm_out_file);
2051 fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
2052 fputs ("// END PREAMBLE\n", asm_out_file);
2055 /* Write out the function declarations we've collected. */
2057 static void
2058 nvptx_file_end (void)
2060 hash_table<tree_hasher>::iterator iter;
2061 tree decl;
2062 FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab, decl, tree, iter)
2063 nvptx_record_fndecl (decl, true);
2064 fputs (func_decls.str().c_str(), asm_out_file);
2067 #undef TARGET_OPTION_OVERRIDE
2068 #define TARGET_OPTION_OVERRIDE nvptx_option_override
2070 #undef TARGET_ATTRIBUTE_TABLE
2071 #define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
2073 #undef TARGET_LEGITIMATE_ADDRESS_P
2074 #define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
2076 #undef TARGET_PROMOTE_FUNCTION_MODE
2077 #define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
2079 #undef TARGET_FUNCTION_ARG
2080 #define TARGET_FUNCTION_ARG nvptx_function_arg
2081 #undef TARGET_FUNCTION_INCOMING_ARG
2082 #define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
2083 #undef TARGET_FUNCTION_ARG_ADVANCE
2084 #define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
2085 #undef TARGET_FUNCTION_ARG_BOUNDARY
2086 #define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
2087 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
2088 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
2089 #undef TARGET_PASS_BY_REFERENCE
2090 #define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
2091 #undef TARGET_FUNCTION_VALUE_REGNO_P
2092 #define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
2093 #undef TARGET_FUNCTION_VALUE
2094 #define TARGET_FUNCTION_VALUE nvptx_function_value
2095 #undef TARGET_LIBCALL_VALUE
2096 #define TARGET_LIBCALL_VALUE nvptx_libcall_value
2097 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2098 #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
2099 #undef TARGET_SPLIT_COMPLEX_ARG
2100 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
2101 #undef TARGET_RETURN_IN_MEMORY
2102 #define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
2103 #undef TARGET_OMIT_STRUCT_RETURN_REG
2104 #define TARGET_OMIT_STRUCT_RETURN_REG true
2105 #undef TARGET_STRICT_ARGUMENT_NAMING
2106 #define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
2107 #undef TARGET_STATIC_CHAIN
2108 #define TARGET_STATIC_CHAIN nvptx_static_chain
2110 #undef TARGET_CALL_ARGS
2111 #define TARGET_CALL_ARGS nvptx_call_args
2112 #undef TARGET_END_CALL_ARGS
2113 #define TARGET_END_CALL_ARGS nvptx_end_call_args
2115 #undef TARGET_ASM_FILE_START
2116 #define TARGET_ASM_FILE_START nvptx_file_start
2117 #undef TARGET_ASM_FILE_END
2118 #define TARGET_ASM_FILE_END nvptx_file_end
2119 #undef TARGET_ASM_GLOBALIZE_LABEL
2120 #define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
2121 #undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
2122 #define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
2123 #undef TARGET_PRINT_OPERAND
2124 #define TARGET_PRINT_OPERAND nvptx_print_operand
2125 #undef TARGET_PRINT_OPERAND_ADDRESS
2126 #define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
2127 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
2128 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
2129 #undef TARGET_ASM_INTEGER
2130 #define TARGET_ASM_INTEGER nvptx_assemble_integer
2131 #undef TARGET_ASM_DECL_END
2132 #define TARGET_ASM_DECL_END nvptx_assemble_decl_end
2133 #undef TARGET_ASM_DECLARE_CONSTANT_NAME
2134 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
2135 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
2136 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2137 #undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
2138 #define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
2140 #undef TARGET_MACHINE_DEPENDENT_REORG
2141 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
2142 #undef TARGET_NO_REGISTER_ALLOCATION
2143 #define TARGET_NO_REGISTER_ALLOCATION true
2145 #undef TARGET_RECORD_OFFLOAD_SYMBOL
2146 #define TARGET_RECORD_OFFLOAD_SYMBOL nvptx_record_offload_symbol
2148 #undef TARGET_VECTOR_ALIGNMENT
2149 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
2151 struct gcc_target targetm = TARGET_INITIALIZER;
2153 #include "gt-nvptx.h"