Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook.
[official-gcc.git] / gcc / config / nvptx / nvptx.c
blob964c3cd4638a6e5e00317bd259282f4b345af5fe
1 /* Target code for NVPTX.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include <sstream>
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "insn-flags.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "insn-codes.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "flags.h"
38 #include "insn-config.h"
39 #include "expmed.h"
40 #include "dojump.h"
41 #include "explow.h"
42 #include "calls.h"
43 #include "emit-rtl.h"
44 #include "varasm.h"
45 #include "stmt.h"
46 #include "expr.h"
47 #include "regs.h"
48 #include "optabs.h"
49 #include "recog.h"
50 #include "timevar.h"
51 #include "tm_p.h"
52 #include "tm-preds.h"
53 #include "tm-constrs.h"
54 #include "langhooks.h"
55 #include "dbxout.h"
56 #include "target.h"
57 #include "target-def.h"
58 #include "diagnostic.h"
59 #include "predict.h"
60 #include "basic-block.h"
61 #include "cfgrtl.h"
62 #include "stor-layout.h"
63 #include "df.h"
64 #include "builtins.h"
66 /* Record the function decls we've written, and the libfuncs and function
67 decls corresponding to them. */
68 static std::stringstream func_decls;
70 struct declared_libfunc_hasher : ggc_cache_hasher<rtx>
72 static hashval_t hash (rtx x) { return htab_hash_pointer (x); }
73 static bool equal (rtx a, rtx b) { return a == b; }
76 static GTY((cache))
77 hash_table<declared_libfunc_hasher> *declared_libfuncs_htab;
79 struct tree_hasher : ggc_cache_hasher<tree>
81 static hashval_t hash (tree t) { return htab_hash_pointer (t); }
82 static bool equal (tree a, tree b) { return a == b; }
85 static GTY((cache)) hash_table<tree_hasher> *declared_fndecls_htab;
86 static GTY((cache)) hash_table<tree_hasher> *needed_fndecls_htab;
88 /* Allocate a new, cleared machine_function structure. */
90 static struct machine_function *
91 nvptx_init_machine_status (void)
93 struct machine_function *p = ggc_cleared_alloc<machine_function> ();
94 p->ret_reg_mode = VOIDmode;
95 return p;
98 /* Implement TARGET_OPTION_OVERRIDE. */
100 static void
101 nvptx_option_override (void)
103 init_machine_status = nvptx_init_machine_status;
104 /* Gives us a predictable order, which we need especially for variables. */
105 flag_toplevel_reorder = 1;
106 /* Assumes that it will see only hard registers. */
107 flag_var_tracking = 0;
108 write_symbols = NO_DEBUG;
109 debug_info_level = DINFO_LEVEL_NONE;
111 declared_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
112 needed_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
113 declared_libfuncs_htab
114 = hash_table<declared_libfunc_hasher>::create_ggc (17);
117 /* Return the mode to be used when declaring a ptx object for OBJ.
118 For objects with subparts such as complex modes this is the mode
119 of the subpart. */
121 machine_mode
122 nvptx_underlying_object_mode (rtx obj)
124 if (GET_CODE (obj) == SUBREG)
125 obj = SUBREG_REG (obj);
126 machine_mode mode = GET_MODE (obj);
127 if (mode == TImode)
128 return DImode;
129 if (COMPLEX_MODE_P (mode))
130 return GET_MODE_INNER (mode);
131 return mode;
134 /* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
135 deal with ptx ideosyncracies. */
137 const char *
138 nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
140 switch (mode)
142 case BLKmode:
143 return ".b8";
144 case BImode:
145 return ".pred";
146 case QImode:
147 if (promote)
148 return ".u32";
149 else
150 return ".u8";
151 case HImode:
152 return ".u16";
153 case SImode:
154 return ".u32";
155 case DImode:
156 return ".u64";
158 case SFmode:
159 return ".f32";
160 case DFmode:
161 return ".f64";
163 default:
164 gcc_unreachable ();
168 /* Return the number of pieces to use when dealing with a pseudo of *PMODE.
169 Alter *PMODE if we return a number greater than one. */
171 static int
172 maybe_split_mode (machine_mode *pmode)
174 machine_mode mode = *pmode;
176 if (COMPLEX_MODE_P (mode))
178 *pmode = GET_MODE_INNER (mode);
179 return 2;
181 else if (mode == TImode)
183 *pmode = DImode;
184 return 2;
186 return 1;
189 /* Like maybe_split_mode, but only return whether or not the mode
190 needs to be split. */
191 static bool
192 nvptx_split_reg_p (machine_mode mode)
194 if (COMPLEX_MODE_P (mode))
195 return true;
196 if (mode == TImode)
197 return true;
198 return false;
201 #define PASS_IN_REG_P(MODE, TYPE) \
202 ((GET_MODE_CLASS (MODE) == MODE_INT \
203 || GET_MODE_CLASS (MODE) == MODE_FLOAT \
204 || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \
205 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
206 && !AGGREGATE_TYPE_P (TYPE))) \
207 && (MODE) != TImode)
209 #define RETURN_IN_REG_P(MODE) \
210 ((GET_MODE_CLASS (MODE) == MODE_INT \
211 || GET_MODE_CLASS (MODE) == MODE_FLOAT) \
212 && GET_MODE_SIZE (MODE) <= 8)
214 /* Perform a mode promotion for a function argument with MODE. Return
215 the promoted mode. */
217 static machine_mode
218 arg_promotion (machine_mode mode)
220 if (mode == QImode || mode == HImode)
221 return SImode;
222 return mode;
225 /* Write the declaration of a function arg of TYPE to S. I is the index
226 of the argument, MODE its mode. NO_ARG_TYPES is true if this is for
227 a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */
229 static int
230 write_one_arg (std::stringstream &s, tree type, int i, machine_mode mode,
231 bool no_arg_types)
233 if (!PASS_IN_REG_P (mode, type))
234 mode = Pmode;
236 int count = maybe_split_mode (&mode);
238 if (count == 2)
240 write_one_arg (s, NULL_TREE, i, mode, false);
241 write_one_arg (s, NULL_TREE, i + 1, mode, false);
242 return i + 1;
245 if (no_arg_types && !AGGREGATE_TYPE_P (type))
247 if (mode == SFmode)
248 mode = DFmode;
249 mode = arg_promotion (mode);
252 if (i > 0)
253 s << ", ";
254 s << ".param" << nvptx_ptx_type_from_mode (mode, false) << " %in_ar"
255 << (i + 1) << (mode == QImode || mode == HImode ? "[1]" : "");
256 if (mode == BLKmode)
257 s << "[" << int_size_in_bytes (type) << "]";
258 return i;
261 /* Look for attributes in ATTRS that would indicate we must write a function
262 as a .entry kernel rather than a .func. Return true if one is found. */
264 static bool
265 write_as_kernel (tree attrs)
267 return (lookup_attribute ("kernel", attrs) != NULL_TREE
268 || lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE);
271 /* Write a function decl for DECL to S, where NAME is the name to be used. */
273 static void
274 nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree decl)
276 tree fntype = TREE_TYPE (decl);
277 tree result_type = TREE_TYPE (fntype);
278 tree args = TYPE_ARG_TYPES (fntype);
279 tree attrs = DECL_ATTRIBUTES (decl);
280 bool kernel = write_as_kernel (attrs);
281 bool is_main = strcmp (name, "main") == 0;
282 bool args_from_decl = false;
284 /* We get:
285 NULL in TYPE_ARG_TYPES, for old-style functions
286 NULL in DECL_ARGUMENTS, for builtin functions without another
287 declaration.
288 So we have to pick the best one we have. */
289 if (args == 0)
291 args = DECL_ARGUMENTS (decl);
292 args_from_decl = true;
295 if (DECL_EXTERNAL (decl))
296 s << ".extern ";
297 else if (TREE_PUBLIC (decl))
298 s << ".visible ";
300 if (kernel)
301 s << ".entry ";
302 else
303 s << ".func ";
305 /* Declare the result. */
306 bool return_in_mem = false;
307 if (TYPE_MODE (result_type) != VOIDmode)
309 machine_mode mode = TYPE_MODE (result_type);
310 if (!RETURN_IN_REG_P (mode))
311 return_in_mem = true;
312 else
314 mode = arg_promotion (mode);
315 s << "(.param" << nvptx_ptx_type_from_mode (mode, false)
316 << " %out_retval)";
320 if (name[0] == '*')
321 s << (name + 1);
322 else
323 s << name;
325 /* Declare argument types. */
326 if ((args != NULL_TREE
327 && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node))
328 || is_main
329 || return_in_mem
330 || DECL_STATIC_CHAIN (decl))
332 s << "(";
333 int i = 0;
334 bool any_args = false;
335 if (return_in_mem)
337 s << ".param.u" << GET_MODE_BITSIZE (Pmode) << " %in_ar1";
338 i++;
340 while (args != NULL_TREE)
342 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
343 machine_mode mode = TYPE_MODE (type);
345 if (mode != VOIDmode)
347 i = write_one_arg (s, type, i, mode,
348 TYPE_ARG_TYPES (fntype) == 0);
349 any_args = true;
350 i++;
352 args = TREE_CHAIN (args);
354 if (stdarg_p (fntype))
356 gcc_assert (i > 0);
357 s << ", .param.u" << GET_MODE_BITSIZE (Pmode) << " %in_argp";
359 if (DECL_STATIC_CHAIN (decl))
361 if (i > 0)
362 s << ", ";
363 s << ".reg.u" << GET_MODE_BITSIZE (Pmode)
364 << reg_names [STATIC_CHAIN_REGNUM];
366 if (!any_args && is_main)
367 s << ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode)
368 << " %argv";
369 s << ")";
373 /* Walk either ARGTYPES or ARGS if the former is null, and write out part of
374 the function header to FILE. If WRITE_COPY is false, write reg
375 declarations, otherwise write the copy from the incoming argument to that
376 reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1
377 instead of 0. */
379 static void
380 walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy,
381 bool return_in_mem)
383 int i;
385 bool args_from_decl = false;
386 if (argtypes == 0)
387 args_from_decl = true;
388 else
389 args = argtypes;
391 for (i = return_in_mem ? 1 : 0; args != NULL_TREE; args = TREE_CHAIN (args))
393 tree type = args_from_decl ? TREE_TYPE (args) : TREE_VALUE (args);
394 machine_mode mode = TYPE_MODE (type);
396 if (mode == VOIDmode)
397 break;
399 if (!PASS_IN_REG_P (mode, type))
400 mode = Pmode;
402 int count = maybe_split_mode (&mode);
403 if (count == 1)
405 if (argtypes == NULL && !AGGREGATE_TYPE_P (type))
407 if (mode == SFmode)
408 mode = DFmode;
411 mode = arg_promotion (mode);
413 while (count-- > 0)
415 i++;
416 if (write_copy)
417 fprintf (file, "\tld.param%s %%ar%d, [%%in_ar%d];\n",
418 nvptx_ptx_type_from_mode (mode, false), i, i);
419 else
420 fprintf (file, "\t.reg%s %%ar%d;\n",
421 nvptx_ptx_type_from_mode (mode, false), i);
426 /* Write a .func or .kernel declaration (not a definition) along with
427 a helper comment for use by ld. S is the stream to write to, DECL
428 the decl for the function with name NAME. */
430 static void
431 write_function_decl_and_comment (std::stringstream &s, const char *name, const_tree decl)
433 s << "// BEGIN";
434 if (TREE_PUBLIC (decl))
435 s << " GLOBAL";
436 s << " FUNCTION DECL: ";
437 if (name[0] == '*')
438 s << (name + 1);
439 else
440 s << name;
441 s << "\n";
442 nvptx_write_function_decl (s, name, decl);
443 s << ";\n";
446 /* Check NAME for special function names and redirect them by returning a
447 replacement. This applies to malloc, free and realloc, for which we
448 want to use libgcc wrappers, and call, which triggers a bug in ptxas. */
450 static const char *
451 nvptx_name_replacement (const char *name)
453 if (strcmp (name, "call") == 0)
454 return "__nvptx_call";
455 if (strcmp (name, "malloc") == 0)
456 return "__nvptx_malloc";
457 if (strcmp (name, "free") == 0)
458 return "__nvptx_free";
459 if (strcmp (name, "realloc") == 0)
460 return "__nvptx_realloc";
461 return name;
464 /* If DECL is a FUNCTION_DECL, check the hash table to see if we
465 already encountered it, and if not, insert it and write a ptx
466 declarations that will be output at the end of compilation. */
468 static bool
469 nvptx_record_fndecl (tree decl, bool force = false)
471 if (decl == NULL_TREE || TREE_CODE (decl) != FUNCTION_DECL
472 || !DECL_EXTERNAL (decl))
473 return true;
475 if (!force && TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE)
476 return false;
478 tree *slot = declared_fndecls_htab->find_slot (decl, INSERT);
479 if (*slot == NULL)
481 *slot = decl;
482 const char *name = get_fnname_from_decl (decl);
483 name = nvptx_name_replacement (name);
484 write_function_decl_and_comment (func_decls, name, decl);
486 return true;
489 /* Record that we need to emit a ptx decl for DECL. Either do it now, or
490 record it for later in case we have no argument information at this
491 point. */
493 void
494 nvptx_record_needed_fndecl (tree decl)
496 if (nvptx_record_fndecl (decl))
497 return;
499 tree *slot = needed_fndecls_htab->find_slot (decl, INSERT);
500 if (*slot == NULL)
501 *slot = decl;
504 /* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
505 function, including local var decls and copies from the arguments to
506 local regs. */
508 void
509 nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
511 tree fntype = TREE_TYPE (decl);
512 tree result_type = TREE_TYPE (fntype);
514 name = nvptx_name_replacement (name);
516 std::stringstream s;
517 write_function_decl_and_comment (s, name, decl);
518 s << "// BEGIN";
519 if (TREE_PUBLIC (decl))
520 s << " GLOBAL";
521 s << " FUNCTION DEF: ";
523 if (name[0] == '*')
524 s << (name + 1);
525 else
526 s << name;
527 s << "\n";
529 nvptx_write_function_decl (s, name, decl);
530 fprintf (file, "%s", s.str().c_str());
532 bool return_in_mem = false;
533 if (TYPE_MODE (result_type) != VOIDmode)
535 machine_mode mode = TYPE_MODE (result_type);
536 if (!RETURN_IN_REG_P (mode))
537 return_in_mem = true;
540 fprintf (file, "\n{\n");
542 /* Ensure all arguments that should live in a register have one
543 declared. We'll emit the copies below. */
544 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
545 false, return_in_mem);
546 if (return_in_mem)
547 fprintf (file, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode));
548 else if (TYPE_MODE (result_type) != VOIDmode)
550 machine_mode mode = arg_promotion (TYPE_MODE (result_type));
551 fprintf (file, ".reg%s %%retval;\n",
552 nvptx_ptx_type_from_mode (mode, false));
555 if (stdarg_p (fntype))
556 fprintf (file, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode));
558 fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode),
559 reg_names[OUTGOING_STATIC_CHAIN_REGNUM]);
561 /* Declare the pseudos we have as ptx registers. */
562 int maxregs = max_reg_num ();
563 for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
565 if (regno_reg_rtx[i] != const0_rtx)
567 machine_mode mode = PSEUDO_REGNO_MODE (i);
568 int count = maybe_split_mode (&mode);
569 if (count > 1)
571 while (count-- > 0)
572 fprintf (file, "\t.reg%s %%r%d$%d;\n",
573 nvptx_ptx_type_from_mode (mode, true),
574 i, count);
576 else
577 fprintf (file, "\t.reg%s %%r%d;\n",
578 nvptx_ptx_type_from_mode (mode, true),
583 /* The only reason we might be using outgoing args is if we call a stdargs
584 function. Allocate the space for this. If we called varargs functions
585 without passing any variadic arguments, we'll see a reference to outargs
586 even with a zero outgoing_args_size. */
587 HOST_WIDE_INT sz = crtl->outgoing_args_size;
588 if (sz == 0)
589 sz = 1;
590 if (cfun->machine->has_call_with_varargs)
591 fprintf (file, "\t.reg.u%d %%outargs;\n"
592 "\t.local.align 8 .b8 %%outargs_ar[" HOST_WIDE_INT_PRINT_DEC"];\n",
593 BITS_PER_WORD, sz);
594 if (cfun->machine->punning_buffer_size > 0)
595 fprintf (file, "\t.reg.u%d %%punbuffer;\n"
596 "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
597 BITS_PER_WORD, cfun->machine->punning_buffer_size);
599 /* Declare a local variable for the frame. */
600 sz = get_frame_size ();
601 if (sz > 0 || cfun->machine->has_call_with_sc)
603 fprintf (file, "\t.reg.u%d %%frame;\n"
604 "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
605 BITS_PER_WORD, sz == 0 ? 1 : sz);
606 fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n",
607 BITS_PER_WORD);
610 if (cfun->machine->has_call_with_varargs)
611 fprintf (file, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n",
612 BITS_PER_WORD);
613 if (cfun->machine->punning_buffer_size > 0)
614 fprintf (file, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
615 BITS_PER_WORD);
617 /* Now emit any copies necessary for arguments. */
618 walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
619 true, return_in_mem);
620 if (return_in_mem)
621 fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n",
622 GET_MODE_BITSIZE (Pmode));
623 if (stdarg_p (fntype))
624 fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n",
625 GET_MODE_BITSIZE (Pmode));
628 /* Output a return instruction. Also copy the return value to its outgoing
629 location. */
631 const char *
632 nvptx_output_return (void)
634 tree fntype = TREE_TYPE (current_function_decl);
635 tree result_type = TREE_TYPE (fntype);
636 if (TYPE_MODE (result_type) != VOIDmode)
638 machine_mode mode = TYPE_MODE (result_type);
639 if (RETURN_IN_REG_P (mode))
641 mode = arg_promotion (mode);
642 fprintf (asm_out_file, "\tst.param%s\t[%%out_retval], %%retval;\n",
643 nvptx_ptx_type_from_mode (mode, false));
647 return "ret;";
650 /* Construct a function declaration from a call insn. This can be
651 necessary for two reasons - either we have an indirect call which
652 requires a .callprototype declaration, or we have a libcall
653 generated by emit_library_call for which no decl exists. */
655 static void
656 write_func_decl_from_insn (std::stringstream &s, rtx result, rtx pat,
657 rtx callee)
659 bool callprototype = register_operand (callee, Pmode);
660 const char *name = "_";
661 if (!callprototype)
663 name = XSTR (callee, 0);
664 name = nvptx_name_replacement (name);
665 s << "// BEGIN GLOBAL FUNCTION DECL: " << name << "\n";
667 s << (callprototype ? "\t.callprototype\t" : "\t.extern .func ");
669 if (result != NULL_RTX)
671 s << "(.param";
672 s << nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
673 false);
674 s << " ";
675 if (callprototype)
676 s << "_";
677 else
678 s << "%out_retval";
679 s << ")";
682 s << name;
684 int nargs = XVECLEN (pat, 0) - 1;
685 if (nargs > 0)
687 s << " (";
688 for (int i = 0; i < nargs; i++)
690 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
691 machine_mode mode = GET_MODE (t);
692 int count = maybe_split_mode (&mode);
694 while (count-- > 0)
696 s << ".param";
697 s << nvptx_ptx_type_from_mode (mode, false);
698 s << " ";
699 if (callprototype)
700 s << "_";
701 else
702 s << "%arg" << i;
703 if (mode == QImode || mode == HImode)
704 s << "[1]";
705 if (i + 1 < nargs || count > 0)
706 s << ", ";
709 s << ")";
711 s << ";\n";
714 /* Terminate a function by writing a closing brace to FILE. */
716 void
717 nvptx_function_end (FILE *file)
719 fprintf (file, "\t}\n");
722 /* Decide whether we can make a sibling call to a function. For ptx, we
723 can't. */
725 static bool
726 nvptx_function_ok_for_sibcall (tree, tree)
728 return false;
731 /* Implement the TARGET_CALL_ARGS hook. Record information about one
732 argument to the next call. */
734 static void
735 nvptx_call_args (rtx arg, tree funtype)
737 if (cfun->machine->start_call == NULL_RTX)
739 cfun->machine->call_args = NULL;
740 cfun->machine->funtype = funtype;
741 cfun->machine->start_call = const0_rtx;
743 if (arg == pc_rtx)
744 return;
746 rtx_expr_list *args_so_far = cfun->machine->call_args;
747 if (REG_P (arg))
748 cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far);
751 /* Implement the corresponding END_CALL_ARGS hook. Clear and free the
752 information we recorded. */
754 static void
755 nvptx_end_call_args (void)
757 cfun->machine->start_call = NULL_RTX;
758 free_EXPR_LIST_list (&cfun->machine->call_args);
761 /* Emit the sequence for a call. */
763 void
764 nvptx_expand_call (rtx retval, rtx address)
766 int nargs;
767 rtx callee = XEXP (address, 0);
768 rtx pat, t;
769 rtvec vec;
770 bool external_decl = false;
772 nargs = 0;
773 for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
774 nargs++;
776 bool has_varargs = false;
777 tree decl_type = NULL_TREE;
779 if (!call_insn_operand (callee, Pmode))
781 callee = force_reg (Pmode, callee);
782 address = change_address (address, QImode, callee);
785 if (GET_CODE (callee) == SYMBOL_REF)
787 tree decl = SYMBOL_REF_DECL (callee);
788 if (decl != NULL_TREE)
790 decl_type = TREE_TYPE (decl);
791 if (DECL_STATIC_CHAIN (decl))
792 cfun->machine->has_call_with_sc = true;
793 if (DECL_EXTERNAL (decl))
794 external_decl = true;
797 if (cfun->machine->funtype
798 /* It's possible to construct testcases where we call a variable.
799 See compile/20020129-1.c. stdarg_p will crash so avoid calling it
800 in such a case. */
801 && (TREE_CODE (cfun->machine->funtype) == FUNCTION_TYPE
802 || TREE_CODE (cfun->machine->funtype) == METHOD_TYPE)
803 && stdarg_p (cfun->machine->funtype))
805 has_varargs = true;
806 cfun->machine->has_call_with_varargs = true;
808 vec = rtvec_alloc (nargs + 1 + (has_varargs ? 1 : 0));
809 pat = gen_rtx_PARALLEL (VOIDmode, vec);
810 if (has_varargs)
812 rtx this_arg = gen_reg_rtx (Pmode);
813 if (Pmode == DImode)
814 emit_move_insn (this_arg, stack_pointer_rtx);
815 else
816 emit_move_insn (this_arg, stack_pointer_rtx);
817 XVECEXP (pat, 0, nargs + 1) = gen_rtx_USE (VOIDmode, this_arg);
820 int i;
821 rtx arg;
822 for (i = 1, arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1), i++)
824 rtx this_arg = XEXP (arg, 0);
825 XVECEXP (pat, 0, i) = gen_rtx_USE (VOIDmode, this_arg);
828 rtx tmp_retval = retval;
829 t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
830 if (retval != NULL_RTX)
832 if (!nvptx_register_operand (retval, GET_MODE (retval)))
833 tmp_retval = gen_reg_rtx (GET_MODE (retval));
834 t = gen_rtx_SET (tmp_retval, t);
836 XVECEXP (pat, 0, 0) = t;
837 if (!REG_P (callee)
838 && (decl_type == NULL_TREE
839 || (external_decl && TYPE_ARG_TYPES (decl_type) == NULL_TREE)))
841 rtx *slot = declared_libfuncs_htab->find_slot (callee, INSERT);
842 if (*slot == NULL)
844 *slot = callee;
845 write_func_decl_from_insn (func_decls, retval, pat, callee);
848 emit_call_insn (pat);
849 if (tmp_retval != retval)
850 emit_move_insn (retval, tmp_retval);
853 /* Implement TARGET_FUNCTION_ARG. */
855 static rtx
856 nvptx_function_arg (cumulative_args_t, machine_mode mode,
857 const_tree, bool named)
859 if (mode == VOIDmode)
860 return NULL_RTX;
862 if (named)
863 return gen_reg_rtx (mode);
864 return NULL_RTX;
867 /* Implement TARGET_FUNCTION_INCOMING_ARG. */
869 static rtx
870 nvptx_function_incoming_arg (cumulative_args_t cum_v, machine_mode mode,
871 const_tree, bool named)
873 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
874 if (mode == VOIDmode)
875 return NULL_RTX;
877 if (!named)
878 return NULL_RTX;
880 /* No need to deal with split modes here, the only case that can
881 happen is complex modes and those are dealt with by
882 TARGET_SPLIT_COMPLEX_ARG. */
883 return gen_rtx_UNSPEC (mode,
884 gen_rtvec (1, GEN_INT (1 + cum->count)),
885 UNSPEC_ARG_REG);
888 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
890 static void
891 nvptx_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
892 const_tree type ATTRIBUTE_UNUSED,
893 bool named ATTRIBUTE_UNUSED)
895 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
896 if (mode == TImode)
897 cum->count += 2;
898 else
899 cum->count++;
902 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
904 For nvptx, we know how to handle functions declared as stdarg: by
905 passing an extra pointer to the unnamed arguments. However, the
906 Fortran frontend can produce a different situation, where a
907 function pointer is declared with no arguments, but the actual
908 function and calls to it take more arguments. In that case, we
909 want to ensure the call matches the definition of the function. */
911 static bool
912 nvptx_strict_argument_naming (cumulative_args_t cum_v)
914 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
915 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
918 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
920 static unsigned int
921 nvptx_function_arg_boundary (machine_mode mode, const_tree type)
923 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
925 if (boundary > BITS_PER_WORD)
926 return 2 * BITS_PER_WORD;
928 if (mode == BLKmode)
930 HOST_WIDE_INT size = int_size_in_bytes (type);
931 if (size > 4)
932 return 2 * BITS_PER_WORD;
933 if (boundary < BITS_PER_WORD)
935 if (size >= 3)
936 return BITS_PER_WORD;
937 if (size >= 2)
938 return 2 * BITS_PER_UNIT;
941 return boundary;
944 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
945 where function FUNC returns or receives a value of data type TYPE. */
947 static rtx
948 nvptx_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
949 bool outgoing)
951 int unsignedp = TYPE_UNSIGNED (type);
952 machine_mode orig_mode = TYPE_MODE (type);
953 machine_mode mode = promote_function_mode (type, orig_mode,
954 &unsignedp, NULL_TREE, 1);
955 if (outgoing)
956 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
957 if (cfun->machine->start_call == NULL_RTX)
958 /* Pretend to return in a hard reg for early uses before pseudos can be
959 generated. */
960 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
961 return gen_reg_rtx (mode);
964 /* Implement TARGET_LIBCALL_VALUE. */
966 static rtx
967 nvptx_libcall_value (machine_mode mode, const_rtx)
969 if (cfun->machine->start_call == NULL_RTX)
970 /* Pretend to return in a hard reg for early uses before pseudos can be
971 generated. */
972 return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
973 return gen_reg_rtx (mode);
976 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
978 static bool
979 nvptx_function_value_regno_p (const unsigned int regno)
981 return regno == NVPTX_RETURN_REGNUM;
984 /* Types with a mode other than those supported by the machine are passed by
985 reference in memory. */
987 static bool
988 nvptx_pass_by_reference (cumulative_args_t, machine_mode mode,
989 const_tree type, bool)
991 return !PASS_IN_REG_P (mode, type);
994 /* Implement TARGET_RETURN_IN_MEMORY. */
996 static bool
997 nvptx_return_in_memory (const_tree type, const_tree)
999 machine_mode mode = TYPE_MODE (type);
1000 if (!RETURN_IN_REG_P (mode))
1001 return true;
1002 return false;
1005 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
1007 static machine_mode
1008 nvptx_promote_function_mode (const_tree type, machine_mode mode,
1009 int *punsignedp,
1010 const_tree funtype, int for_return)
1012 if (type == NULL_TREE)
1013 return mode;
1014 if (for_return)
1015 return promote_mode (type, mode, punsignedp);
1016 /* For K&R-style functions, try to match the language promotion rules to
1017 minimize type mismatches at assembly time. */
1018 if (TYPE_ARG_TYPES (funtype) == NULL_TREE
1019 && type != NULL_TREE
1020 && !AGGREGATE_TYPE_P (type))
1022 if (mode == SFmode)
1023 mode = DFmode;
1024 mode = arg_promotion (mode);
1027 return mode;
1030 /* Implement TARGET_STATIC_CHAIN. */
1032 static rtx
1033 nvptx_static_chain (const_tree fndecl, bool incoming_p)
1035 if (!DECL_STATIC_CHAIN (fndecl))
1036 return NULL;
1038 if (incoming_p)
1039 return gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
1040 else
1041 return gen_rtx_REG (Pmode, OUTGOING_STATIC_CHAIN_REGNUM);
1044 /* Emit a comparison COMPARE, and return the new test to be used in the
1045 jump. */
1048 nvptx_expand_compare (rtx compare)
1050 rtx pred = gen_reg_rtx (BImode);
1051 rtx cmp = gen_rtx_fmt_ee (GET_CODE (compare), BImode,
1052 XEXP (compare, 0), XEXP (compare, 1));
1053 emit_insn (gen_rtx_SET (pred, cmp));
1054 return gen_rtx_NE (BImode, pred, const0_rtx);
1057 /* When loading an operand ORIG_OP, verify whether an address space
1058 conversion to generic is required, and if so, perform it. Also
1059 check for SYMBOL_REFs for function decls and call
1060 nvptx_record_needed_fndecl as needed.
1061 Return either the original operand, or the converted one. */
1064 nvptx_maybe_convert_symbolic_operand (rtx orig_op)
1066 if (GET_MODE (orig_op) != Pmode)
1067 return orig_op;
1069 rtx op = orig_op;
1070 while (GET_CODE (op) == PLUS || GET_CODE (op) == CONST)
1071 op = XEXP (op, 0);
1072 if (GET_CODE (op) != SYMBOL_REF)
1073 return orig_op;
1075 tree decl = SYMBOL_REF_DECL (op);
1076 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
1078 nvptx_record_needed_fndecl (decl);
1079 return orig_op;
1082 addr_space_t as = nvptx_addr_space_from_address (op);
1083 if (as == ADDR_SPACE_GENERIC)
1084 return orig_op;
1086 enum unspec code;
1087 code = (as == ADDR_SPACE_GLOBAL ? UNSPEC_FROM_GLOBAL
1088 : as == ADDR_SPACE_LOCAL ? UNSPEC_FROM_LOCAL
1089 : as == ADDR_SPACE_SHARED ? UNSPEC_FROM_SHARED
1090 : as == ADDR_SPACE_CONST ? UNSPEC_FROM_CONST
1091 : UNSPEC_FROM_PARAM);
1092 rtx dest = gen_reg_rtx (Pmode);
1093 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig_op),
1094 code)));
1095 return dest;
1098 /* Returns true if X is a valid address for use in a memory reference. */
1100 static bool
1101 nvptx_legitimate_address_p (machine_mode, rtx x, bool)
1103 enum rtx_code code = GET_CODE (x);
1105 switch (code)
1107 case REG:
1108 return true;
1110 case PLUS:
1111 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1112 return true;
1113 return false;
1115 case CONST:
1116 case SYMBOL_REF:
1117 case LABEL_REF:
1118 return true;
1120 default:
1121 return false;
1125 /* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
1126 to ensure that the return register's mode isn't changed. */
1128 bool
1129 nvptx_hard_regno_mode_ok (int regno, machine_mode mode)
1131 if (regno != NVPTX_RETURN_REGNUM
1132 || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode)
1133 return true;
1134 return mode == cfun->machine->ret_reg_mode;
1137 /* Convert an address space AS to the corresponding ptx string. */
1139 const char *
1140 nvptx_section_from_addr_space (addr_space_t as)
1142 switch (as)
1144 case ADDR_SPACE_CONST:
1145 return ".const";
1147 case ADDR_SPACE_GLOBAL:
1148 return ".global";
1150 case ADDR_SPACE_SHARED:
1151 return ".shared";
1153 case ADDR_SPACE_GENERIC:
1154 return "";
1156 default:
1157 gcc_unreachable ();
1161 /* Determine whether DECL goes into .const or .global. */
1163 const char *
1164 nvptx_section_for_decl (const_tree decl)
1166 bool is_const = (CONSTANT_CLASS_P (decl)
1167 || TREE_CODE (decl) == CONST_DECL
1168 || TREE_READONLY (decl));
1169 if (is_const)
1170 return ".const";
1172 return ".global";
1175 /* Look for a SYMBOL_REF in ADDR and return the address space to be used
1176 for the insn referencing this address. */
1178 addr_space_t
1179 nvptx_addr_space_from_address (rtx addr)
1181 while (GET_CODE (addr) == PLUS || GET_CODE (addr) == CONST)
1182 addr = XEXP (addr, 0);
1183 if (GET_CODE (addr) != SYMBOL_REF)
1184 return ADDR_SPACE_GENERIC;
1186 tree decl = SYMBOL_REF_DECL (addr);
1187 if (decl == NULL_TREE || TREE_CODE (decl) == FUNCTION_DECL)
1188 return ADDR_SPACE_GENERIC;
1190 bool is_const = (CONSTANT_CLASS_P (decl)
1191 || TREE_CODE (decl) == CONST_DECL
1192 || TREE_READONLY (decl));
1193 if (is_const)
1194 return ADDR_SPACE_CONST;
1196 return ADDR_SPACE_GLOBAL;
1199 /* Machinery to output constant initializers. */
1201 /* Used when assembling integers to ensure data is emitted in
1202 pieces whose size matches the declaration we printed. */
1203 static unsigned int decl_chunk_size;
1204 static machine_mode decl_chunk_mode;
1205 /* Used in the same situation, to keep track of the byte offset
1206 into the initializer. */
1207 static unsigned HOST_WIDE_INT decl_offset;
1208 /* The initializer part we are currently processing. */
1209 static HOST_WIDE_INT init_part;
1210 /* The total size of the object. */
1211 static unsigned HOST_WIDE_INT object_size;
1212 /* True if we found a skip extending to the end of the object. Used to
1213 assert that no data follows. */
1214 static bool object_finished;
1216 /* Write the necessary separator string to begin a new initializer value. */
1218 static void
1219 begin_decl_field (void)
1221 /* We never see decl_offset at zero by the time we get here. */
1222 if (decl_offset == decl_chunk_size)
1223 fprintf (asm_out_file, " = { ");
1224 else
1225 fprintf (asm_out_file, ", ");
1228 /* Output the currently stored chunk as an initializer value. */
1230 static void
1231 output_decl_chunk (void)
1233 begin_decl_field ();
1234 output_address (gen_int_mode (init_part, decl_chunk_mode));
1235 init_part = 0;
1238 /* Add value VAL sized SIZE to the data we're emitting, and keep writing
1239 out chunks as they fill up. */
1241 static void
1242 nvptx_assemble_value (HOST_WIDE_INT val, unsigned int size)
1244 unsigned HOST_WIDE_INT chunk_offset = decl_offset % decl_chunk_size;
1245 gcc_assert (!object_finished);
1246 while (size > 0)
1248 int this_part = size;
1249 if (chunk_offset + this_part > decl_chunk_size)
1250 this_part = decl_chunk_size - chunk_offset;
1251 HOST_WIDE_INT val_part;
1252 HOST_WIDE_INT mask = 2;
1253 mask <<= this_part * BITS_PER_UNIT - 1;
1254 val_part = val & (mask - 1);
1255 init_part |= val_part << (BITS_PER_UNIT * chunk_offset);
1256 val >>= BITS_PER_UNIT * this_part;
1257 size -= this_part;
1258 decl_offset += this_part;
1259 if (decl_offset % decl_chunk_size == 0)
1260 output_decl_chunk ();
1262 chunk_offset = 0;
1266 /* Target hook for assembling integer object X of size SIZE. */
1268 static bool
1269 nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
1271 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
1273 gcc_assert (size = decl_chunk_size);
1274 if (decl_offset % decl_chunk_size != 0)
1275 sorry ("cannot emit unaligned pointers in ptx assembly");
1276 decl_offset += size;
1277 begin_decl_field ();
1279 HOST_WIDE_INT off = 0;
1280 if (GET_CODE (x) == CONST)
1281 x = XEXP (x, 0);
1282 if (GET_CODE (x) == PLUS)
1284 off = INTVAL (XEXP (x, 1));
1285 x = XEXP (x, 0);
1287 if (GET_CODE (x) == SYMBOL_REF)
1289 nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x));
1290 fprintf (asm_out_file, "generic(");
1291 output_address (x);
1292 fprintf (asm_out_file, ")");
1294 if (off != 0)
1295 fprintf (asm_out_file, " + " HOST_WIDE_INT_PRINT_DEC, off);
1296 return true;
1299 HOST_WIDE_INT val;
1300 switch (GET_CODE (x))
1302 case CONST_INT:
1303 val = INTVAL (x);
1304 break;
1305 case CONST_DOUBLE:
1306 gcc_unreachable ();
1307 break;
1308 default:
1309 gcc_unreachable ();
1312 nvptx_assemble_value (val, size);
1313 return true;
1316 /* Output SIZE zero bytes. We ignore the FILE argument since the
1317 functions we're calling to perform the output just use
1318 asm_out_file. */
1320 void
1321 nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
1323 if (decl_offset + size >= object_size)
1325 if (decl_offset % decl_chunk_size != 0)
1326 nvptx_assemble_value (0, decl_chunk_size);
1327 object_finished = true;
1328 return;
1331 while (size > decl_chunk_size)
1333 nvptx_assemble_value (0, decl_chunk_size);
1334 size -= decl_chunk_size;
1336 while (size-- > 0)
1337 nvptx_assemble_value (0, 1);
1340 /* Output a string STR with length SIZE. As in nvptx_output_skip we
1341 ignore the FILE arg. */
1343 void
1344 nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size)
1346 for (unsigned HOST_WIDE_INT i = 0; i < size; i++)
1347 nvptx_assemble_value (str[i], 1);
1350 /* Called when the initializer for a decl has been completely output through
1351 combinations of the three functions above. */
1353 static void
1354 nvptx_assemble_decl_end (void)
1356 if (decl_offset != 0)
1358 if (!object_finished && decl_offset % decl_chunk_size != 0)
1359 nvptx_assemble_value (0, decl_chunk_size);
1361 fprintf (asm_out_file, " }");
1363 fprintf (asm_out_file, ";\n");
1366 /* Start a declaration of a variable of TYPE with NAME to
1367 FILE. IS_PUBLIC says whether this will be externally visible.
1368 Here we just write the linker hint and decide on the chunk size
1369 to use. */
1371 static void
1372 init_output_initializer (FILE *file, const char *name, const_tree type,
1373 bool is_public)
1375 fprintf (file, "// BEGIN%s VAR DEF: ", is_public ? " GLOBAL" : "");
1376 assemble_name_raw (file, name);
1377 fputc ('\n', file);
1379 if (TREE_CODE (type) == ARRAY_TYPE)
1380 type = TREE_TYPE (type);
1381 int sz = int_size_in_bytes (type);
1382 if ((TREE_CODE (type) != INTEGER_TYPE
1383 && TREE_CODE (type) != ENUMERAL_TYPE
1384 && TREE_CODE (type) != REAL_TYPE)
1385 || sz < 0
1386 || sz > HOST_BITS_PER_WIDE_INT)
1387 type = ptr_type_node;
1388 decl_chunk_size = int_size_in_bytes (type);
1389 decl_chunk_mode = int_mode_for_mode (TYPE_MODE (type));
1390 decl_offset = 0;
1391 init_part = 0;
1392 object_finished = false;
1395 /* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
1396 writing a constant variable EXP with NAME and SIZE and its
1397 initializer to FILE. */
1399 static void
1400 nvptx_asm_declare_constant_name (FILE *file, const char *name,
1401 const_tree exp, HOST_WIDE_INT size)
1403 tree type = TREE_TYPE (exp);
1404 init_output_initializer (file, name, type, false);
1405 fprintf (file, "\t.const .align %d .u%d ",
1406 TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT,
1407 decl_chunk_size * BITS_PER_UNIT);
1408 assemble_name (file, name);
1409 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1410 (size + decl_chunk_size - 1) / decl_chunk_size);
1411 object_size = size;
1414 /* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
1415 a variable DECL with NAME to FILE. */
1417 void
1418 nvptx_declare_object_name (FILE *file, const char *name, const_tree decl)
1420 if (decl && DECL_SIZE (decl))
1422 tree type = TREE_TYPE (decl);
1423 unsigned HOST_WIDE_INT size;
1425 init_output_initializer (file, name, type, TREE_PUBLIC (decl));
1426 size = tree_to_uhwi (DECL_SIZE_UNIT (decl));
1427 const char *section = nvptx_section_for_decl (decl);
1428 fprintf (file, "\t%s%s .align %d .u%d ",
1429 TREE_PUBLIC (decl) ? " .visible" : "", section,
1430 DECL_ALIGN (decl) / BITS_PER_UNIT,
1431 decl_chunk_size * BITS_PER_UNIT);
1432 assemble_name (file, name);
1433 if (size > 0)
1434 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1435 (size + decl_chunk_size - 1) / decl_chunk_size);
1436 else
1437 object_finished = true;
1438 object_size = size;
1442 /* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
1444 static void
1445 nvptx_globalize_label (FILE *, const char *)
1449 /* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
1450 declaration only for variable DECL with NAME to FILE. */
1451 static void
1452 nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
1454 if (TREE_CODE (decl) != VAR_DECL)
1455 return;
1456 const char *section = nvptx_section_for_decl (decl);
1457 fprintf (file, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl) ? " GLOBAL" : "");
1458 assemble_name_raw (file, name);
1459 fputs ("\n", file);
1460 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
1461 fprintf (file, ".extern %s .b8 ", section);
1462 assemble_name_raw (file, name);
1463 if (size > 0)
1464 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC"]", size);
1465 fprintf (file, ";\n\n");
1468 /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
1469 involves writing .param declarations and in/out copies into them. */
1471 const char *
1472 nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
1474 char buf[256];
1475 static int labelno;
1476 bool needs_tgt = register_operand (callee, Pmode);
1477 rtx pat = PATTERN (insn);
1478 int nargs = XVECLEN (pat, 0) - 1;
1479 tree decl = NULL_TREE;
1481 fprintf (asm_out_file, "\t{\n");
1482 if (result != NULL)
1484 fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n",
1485 nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
1486 false));
1489 if (GET_CODE (callee) == SYMBOL_REF)
1491 decl = SYMBOL_REF_DECL (callee);
1492 if (decl && DECL_EXTERNAL (decl))
1493 nvptx_record_fndecl (decl);
1496 if (needs_tgt)
1498 ASM_GENERATE_INTERNAL_LABEL (buf, "LCT", labelno);
1499 labelno++;
1500 ASM_OUTPUT_LABEL (asm_out_file, buf);
1501 std::stringstream s;
1502 write_func_decl_from_insn (s, result, pat, callee);
1503 fputs (s.str().c_str(), asm_out_file);
1506 for (int i = 0, argno = 0; i < nargs; i++)
1508 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1509 machine_mode mode = GET_MODE (t);
1510 int count = maybe_split_mode (&mode);
1512 while (count-- > 0)
1513 fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
1514 nvptx_ptx_type_from_mode (mode, false), argno++,
1515 mode == QImode || mode == HImode ? "[1]" : "");
1517 for (int i = 0, argno = 0; i < nargs; i++)
1519 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1520 gcc_assert (REG_P (t));
1521 machine_mode mode = GET_MODE (t);
1522 int count = maybe_split_mode (&mode);
1524 if (count == 1)
1525 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
1526 nvptx_ptx_type_from_mode (mode, false), argno++,
1527 REGNO (t));
1528 else
1530 int n = 0;
1531 while (count-- > 0)
1532 fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
1533 nvptx_ptx_type_from_mode (mode, false), argno++,
1534 REGNO (t), n++);
1538 fprintf (asm_out_file, "\t\tcall ");
1539 if (result != NULL_RTX)
1540 fprintf (asm_out_file, "(%%retval_in), ");
1542 if (decl)
1544 const char *name = get_fnname_from_decl (decl);
1545 name = nvptx_name_replacement (name);
1546 assemble_name (asm_out_file, name);
1548 else
1549 output_address (callee);
1551 if (nargs > 0 || (decl && DECL_STATIC_CHAIN (decl)))
1553 fprintf (asm_out_file, ", (");
1554 int i, argno;
1555 for (i = 0, argno = 0; i < nargs; i++)
1557 rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
1558 machine_mode mode = GET_MODE (t);
1559 int count = maybe_split_mode (&mode);
1561 while (count-- > 0)
1563 fprintf (asm_out_file, "%%out_arg%d", argno++);
1564 if (i + 1 < nargs || count > 0)
1565 fprintf (asm_out_file, ", ");
1568 if (decl && DECL_STATIC_CHAIN (decl))
1570 if (i > 0)
1571 fprintf (asm_out_file, ", ");
1572 fprintf (asm_out_file, "%s",
1573 reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
1576 fprintf (asm_out_file, ")");
1578 if (needs_tgt)
1580 fprintf (asm_out_file, ", ");
1581 assemble_name (asm_out_file, buf);
1583 fprintf (asm_out_file, ";\n");
1584 if (result != NULL_RTX)
1585 return "ld.param%t0\t%0, [%%retval_in];\n\t}";
1587 return "}";
1590 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
1592 static bool
1593 nvptx_print_operand_punct_valid_p (unsigned char c)
1595 return c == '.' || c== '#';
1598 static void nvptx_print_operand (FILE *, rtx, int);
1600 /* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
1602 static void
1603 nvptx_print_address_operand (FILE *file, rtx x, machine_mode)
1605 rtx off;
1606 if (GET_CODE (x) == CONST)
1607 x = XEXP (x, 0);
1608 switch (GET_CODE (x))
1610 case PLUS:
1611 off = XEXP (x, 1);
1612 output_address (XEXP (x, 0));
1613 fprintf (file, "+");
1614 output_address (off);
1615 break;
1617 case SYMBOL_REF:
1618 case LABEL_REF:
1619 output_addr_const (file, x);
1620 break;
1622 default:
1623 gcc_assert (GET_CODE (x) != MEM);
1624 nvptx_print_operand (file, x, 0);
1625 break;
1629 /* Write assembly language output for the address ADDR to FILE. */
1631 static void
1632 nvptx_print_operand_address (FILE *file, rtx addr)
1634 nvptx_print_address_operand (file, addr, VOIDmode);
1637 /* Print an operand, X, to FILE, with an optional modifier in CODE.
1639 Meaning of CODE:
1640 . -- print the predicate for the instruction or an emptry string for an
1641 unconditional one.
1642 # -- print a rounding mode for the instruction
1644 A -- print an address space identifier for a MEM
1645 c -- print an opcode suffix for a comparison operator, including a type code
1646 d -- print a CONST_INT as a vector dimension (x, y, or z)
1647 f -- print a full reg even for something that must always be split
1648 t -- print a type opcode suffix, promoting QImode to 32 bits
1649 T -- print a type size in bits
1650 u -- print a type opcode suffix without promotions. */
1652 static void
1653 nvptx_print_operand (FILE *file, rtx x, int code)
1655 rtx orig_x = x;
1656 machine_mode op_mode;
1658 if (code == '.')
1660 x = current_insn_predicate;
1661 if (x)
1663 unsigned int regno = REGNO (XEXP (x, 0));
1664 fputs ("[", file);
1665 if (GET_CODE (x) == EQ)
1666 fputs ("!", file);
1667 fputs (reg_names [regno], file);
1668 fputs ("]", file);
1670 return;
1672 else if (code == '#')
1674 fputs (".rn", file);
1675 return;
1678 enum rtx_code x_code = GET_CODE (x);
1680 switch (code)
1682 case 'A':
1684 addr_space_t as = nvptx_addr_space_from_address (XEXP (x, 0));
1685 fputs (nvptx_section_from_addr_space (as), file);
1687 break;
1689 case 'd':
1690 gcc_assert (x_code == CONST_INT);
1691 if (INTVAL (x) == 0)
1692 fputs (".x", file);
1693 else if (INTVAL (x) == 1)
1694 fputs (".y", file);
1695 else if (INTVAL (x) == 2)
1696 fputs (".z", file);
1697 else
1698 gcc_unreachable ();
1699 break;
1701 case 't':
1702 op_mode = nvptx_underlying_object_mode (x);
1703 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
1704 break;
1706 case 'u':
1707 op_mode = nvptx_underlying_object_mode (x);
1708 fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false));
1709 break;
1711 case 'T':
1712 fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
1713 break;
1715 case 'j':
1716 fprintf (file, "@");
1717 goto common;
1719 case 'J':
1720 fprintf (file, "@!");
1721 goto common;
1723 case 'c':
1724 op_mode = GET_MODE (XEXP (x, 0));
1725 switch (x_code)
1727 case EQ:
1728 fputs (".eq", file);
1729 break;
1730 case NE:
1731 if (FLOAT_MODE_P (op_mode))
1732 fputs (".neu", file);
1733 else
1734 fputs (".ne", file);
1735 break;
1736 case LE:
1737 fputs (".le", file);
1738 break;
1739 case GE:
1740 fputs (".ge", file);
1741 break;
1742 case LT:
1743 fputs (".lt", file);
1744 break;
1745 case GT:
1746 fputs (".gt", file);
1747 break;
1748 case LEU:
1749 fputs (".ls", file);
1750 break;
1751 case GEU:
1752 fputs (".hs", file);
1753 break;
1754 case LTU:
1755 fputs (".lo", file);
1756 break;
1757 case GTU:
1758 fputs (".hi", file);
1759 break;
1760 case LTGT:
1761 fputs (".ne", file);
1762 break;
1763 case UNEQ:
1764 fputs (".equ", file);
1765 break;
1766 case UNLE:
1767 fputs (".leu", file);
1768 break;
1769 case UNGE:
1770 fputs (".geu", file);
1771 break;
1772 case UNLT:
1773 fputs (".ltu", file);
1774 break;
1775 case UNGT:
1776 fputs (".gtu", file);
1777 break;
1778 case UNORDERED:
1779 fputs (".nan", file);
1780 break;
1781 case ORDERED:
1782 fputs (".num", file);
1783 break;
1784 default:
1785 gcc_unreachable ();
1787 if (FLOAT_MODE_P (op_mode)
1788 || x_code == EQ || x_code == NE
1789 || x_code == GEU || x_code == GTU
1790 || x_code == LEU || x_code == LTU)
1791 fputs (nvptx_ptx_type_from_mode (op_mode, true), file);
1792 else
1793 fprintf (file, ".s%d", GET_MODE_BITSIZE (op_mode));
1794 break;
1795 default:
1796 common:
1797 switch (x_code)
1799 case SUBREG:
1800 x = SUBREG_REG (x);
1801 /* fall through */
1803 case REG:
1804 if (HARD_REGISTER_P (x))
1805 fprintf (file, "%s", reg_names[REGNO (x)]);
1806 else
1807 fprintf (file, "%%r%d", REGNO (x));
1808 if (code != 'f' && nvptx_split_reg_p (GET_MODE (x)))
1810 gcc_assert (GET_CODE (orig_x) == SUBREG
1811 && !nvptx_split_reg_p (GET_MODE (orig_x)));
1812 fprintf (file, "$%d", SUBREG_BYTE (orig_x) / UNITS_PER_WORD);
1814 break;
1816 case MEM:
1817 fputc ('[', file);
1818 nvptx_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
1819 fputc (']', file);
1820 break;
1822 case CONST_INT:
1823 output_addr_const (file, x);
1824 break;
1826 case CONST:
1827 case SYMBOL_REF:
1828 case LABEL_REF:
1829 /* We could use output_addr_const, but that can print things like
1830 "x-8", which breaks ptxas. Need to ensure it is output as
1831 "x+-8". */
1832 nvptx_print_address_operand (file, x, VOIDmode);
1833 break;
1835 case CONST_DOUBLE:
1836 long vals[2];
1837 REAL_VALUE_TYPE real;
1838 REAL_VALUE_FROM_CONST_DOUBLE (real, x);
1839 real_to_target (vals, &real, GET_MODE (x));
1840 vals[0] &= 0xffffffff;
1841 vals[1] &= 0xffffffff;
1842 if (GET_MODE (x) == SFmode)
1843 fprintf (file, "0f%08lx", vals[0]);
1844 else
1845 fprintf (file, "0d%08lx%08lx", vals[1], vals[0]);
1846 break;
1848 default:
1849 output_addr_const (file, x);
1854 /* Record replacement regs used to deal with subreg operands. */
1855 struct reg_replace
1857 rtx replacement[MAX_RECOG_OPERANDS];
1858 machine_mode mode;
1859 int n_allocated;
1860 int n_in_use;
1863 /* Allocate or reuse a replacement in R and return the rtx. */
1865 static rtx
1866 get_replacement (struct reg_replace *r)
1868 if (r->n_allocated == r->n_in_use)
1869 r->replacement[r->n_allocated++] = gen_reg_rtx (r->mode);
1870 return r->replacement[r->n_in_use++];
1873 /* Clean up subreg operands. In ptx assembly, everything is typed, and
1874 the presence of subregs would break the rules for most instructions.
1875 Replace them with a suitable new register of the right size, plus
1876 conversion copyin/copyout instructions. */
1878 static void
1879 nvptx_reorg (void)
1881 struct reg_replace qiregs, hiregs, siregs, diregs;
1882 rtx_insn *insn, *next;
1884 /* We are freeing block_for_insn in the toplev to keep compatibility
1885 with old MDEP_REORGS that are not CFG based. Recompute it now. */
1886 compute_bb_for_insn ();
1888 df_clear_flags (DF_LR_RUN_DCE);
1889 df_analyze ();
1891 thread_prologue_and_epilogue_insns ();
1893 qiregs.n_allocated = 0;
1894 hiregs.n_allocated = 0;
1895 siregs.n_allocated = 0;
1896 diregs.n_allocated = 0;
1897 qiregs.mode = QImode;
1898 hiregs.mode = HImode;
1899 siregs.mode = SImode;
1900 diregs.mode = DImode;
1902 for (insn = get_insns (); insn; insn = next)
1904 next = NEXT_INSN (insn);
1905 if (!NONDEBUG_INSN_P (insn)
1906 || asm_noperands (insn) >= 0
1907 || GET_CODE (PATTERN (insn)) == USE
1908 || GET_CODE (PATTERN (insn)) == CLOBBER)
1909 continue;
1910 qiregs.n_in_use = 0;
1911 hiregs.n_in_use = 0;
1912 siregs.n_in_use = 0;
1913 diregs.n_in_use = 0;
1914 extract_insn (insn);
1915 enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn);
1916 for (int i = 0; i < recog_data.n_operands; i++)
1918 rtx op = recog_data.operand[i];
1919 if (GET_CODE (op) != SUBREG)
1920 continue;
1922 rtx inner = SUBREG_REG (op);
1924 machine_mode outer_mode = GET_MODE (op);
1925 machine_mode inner_mode = GET_MODE (inner);
1926 gcc_assert (s_ok);
1927 if (s_ok
1928 && (GET_MODE_PRECISION (inner_mode)
1929 >= GET_MODE_PRECISION (outer_mode)))
1930 continue;
1931 gcc_assert (SCALAR_INT_MODE_P (outer_mode));
1932 struct reg_replace *r = (outer_mode == QImode ? &qiregs
1933 : outer_mode == HImode ? &hiregs
1934 : outer_mode == SImode ? &siregs
1935 : &diregs);
1936 rtx new_reg = get_replacement (r);
1938 if (recog_data.operand_type[i] != OP_OUT)
1940 enum rtx_code code;
1941 if (GET_MODE_PRECISION (inner_mode)
1942 < GET_MODE_PRECISION (outer_mode))
1943 code = ZERO_EXTEND;
1944 else
1945 code = TRUNCATE;
1947 rtx pat = gen_rtx_SET (new_reg,
1948 gen_rtx_fmt_e (code, outer_mode, inner));
1949 emit_insn_before (pat, insn);
1952 if (recog_data.operand_type[i] != OP_IN)
1954 enum rtx_code code;
1955 if (GET_MODE_PRECISION (inner_mode)
1956 < GET_MODE_PRECISION (outer_mode))
1957 code = TRUNCATE;
1958 else
1959 code = ZERO_EXTEND;
1961 rtx pat = gen_rtx_SET (inner,
1962 gen_rtx_fmt_e (code, inner_mode, new_reg));
1963 emit_insn_after (pat, insn);
1965 validate_change (insn, recog_data.operand_loc[i], new_reg, false);
1969 int maxregs = max_reg_num ();
1970 regstat_init_n_sets_and_refs ();
1972 for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
1973 if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
1974 regno_reg_rtx[i] = const0_rtx;
1975 regstat_free_n_sets_and_refs ();
1978 /* Handle a "kernel" attribute; arguments as in
1979 struct attribute_spec.handler. */
1981 static tree
1982 nvptx_handle_kernel_attribute (tree *node, tree name, tree ARG_UNUSED (args),
1983 int ARG_UNUSED (flags), bool *no_add_attrs)
1985 tree decl = *node;
1987 if (TREE_CODE (decl) != FUNCTION_DECL)
1989 error ("%qE attribute only applies to functions", name);
1990 *no_add_attrs = true;
1993 else if (TREE_TYPE (TREE_TYPE (decl)) != void_type_node)
1995 error ("%qE attribute requires a void return type", name);
1996 *no_add_attrs = true;
1999 return NULL_TREE;
2002 /* Table of valid machine attributes. */
2003 static const struct attribute_spec nvptx_attribute_table[] =
2005 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
2006 affects_type_identity } */
2007 { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute, false },
2008 { NULL, 0, 0, false, false, false, NULL, false }
2011 /* Limit vector alignments to BIGGEST_ALIGNMENT. */
2013 static HOST_WIDE_INT
2014 nvptx_vector_alignment (const_tree type)
2016 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
2018 return MIN (align, BIGGEST_ALIGNMENT);
2021 /* Record a symbol for mkoffload to enter into the mapping table. */
2023 static void
2024 nvptx_record_offload_symbol (tree decl)
2026 fprintf (asm_out_file, "//:%s_MAP %s\n",
2027 TREE_CODE (decl) == VAR_DECL ? "VAR" : "FUNC",
2028 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
2031 /* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
2032 at the start of a file. */
2034 static void
2035 nvptx_file_start (void)
2037 fputs ("// BEGIN PREAMBLE\n", asm_out_file);
2038 fputs ("\t.version\t3.1\n", asm_out_file);
2039 fputs ("\t.target\tsm_30\n", asm_out_file);
2040 fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
2041 fputs ("// END PREAMBLE\n", asm_out_file);
2044 /* Write out the function declarations we've collected. */
2046 static void
2047 nvptx_file_end (void)
2049 hash_table<tree_hasher>::iterator iter;
2050 tree decl;
2051 FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab, decl, tree, iter)
2052 nvptx_record_fndecl (decl, true);
2053 fputs (func_decls.str().c_str(), asm_out_file);
2056 #undef TARGET_OPTION_OVERRIDE
2057 #define TARGET_OPTION_OVERRIDE nvptx_option_override
2059 #undef TARGET_ATTRIBUTE_TABLE
2060 #define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
2062 #undef TARGET_LEGITIMATE_ADDRESS_P
2063 #define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
2065 #undef TARGET_PROMOTE_FUNCTION_MODE
2066 #define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
2068 #undef TARGET_FUNCTION_ARG
2069 #define TARGET_FUNCTION_ARG nvptx_function_arg
2070 #undef TARGET_FUNCTION_INCOMING_ARG
2071 #define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
2072 #undef TARGET_FUNCTION_ARG_ADVANCE
2073 #define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
2074 #undef TARGET_FUNCTION_ARG_BOUNDARY
2075 #define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
2076 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
2077 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
2078 #undef TARGET_PASS_BY_REFERENCE
2079 #define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
2080 #undef TARGET_FUNCTION_VALUE_REGNO_P
2081 #define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
2082 #undef TARGET_FUNCTION_VALUE
2083 #define TARGET_FUNCTION_VALUE nvptx_function_value
2084 #undef TARGET_LIBCALL_VALUE
2085 #define TARGET_LIBCALL_VALUE nvptx_libcall_value
2086 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2087 #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
2088 #undef TARGET_SPLIT_COMPLEX_ARG
2089 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
2090 #undef TARGET_RETURN_IN_MEMORY
2091 #define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
2092 #undef TARGET_OMIT_STRUCT_RETURN_REG
2093 #define TARGET_OMIT_STRUCT_RETURN_REG true
2094 #undef TARGET_STRICT_ARGUMENT_NAMING
2095 #define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
2096 #undef TARGET_STATIC_CHAIN
2097 #define TARGET_STATIC_CHAIN nvptx_static_chain
2099 #undef TARGET_CALL_ARGS
2100 #define TARGET_CALL_ARGS nvptx_call_args
2101 #undef TARGET_END_CALL_ARGS
2102 #define TARGET_END_CALL_ARGS nvptx_end_call_args
2104 #undef TARGET_ASM_FILE_START
2105 #define TARGET_ASM_FILE_START nvptx_file_start
2106 #undef TARGET_ASM_FILE_END
2107 #define TARGET_ASM_FILE_END nvptx_file_end
2108 #undef TARGET_ASM_GLOBALIZE_LABEL
2109 #define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
2110 #undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
2111 #define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
2112 #undef TARGET_PRINT_OPERAND
2113 #define TARGET_PRINT_OPERAND nvptx_print_operand
2114 #undef TARGET_PRINT_OPERAND_ADDRESS
2115 #define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
2116 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
2117 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
2118 #undef TARGET_ASM_INTEGER
2119 #define TARGET_ASM_INTEGER nvptx_assemble_integer
2120 #undef TARGET_ASM_DECL_END
2121 #define TARGET_ASM_DECL_END nvptx_assemble_decl_end
2122 #undef TARGET_ASM_DECLARE_CONSTANT_NAME
2123 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
2124 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
2125 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2126 #undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
2127 #define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
2129 #undef TARGET_MACHINE_DEPENDENT_REORG
2130 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
2131 #undef TARGET_NO_REGISTER_ALLOCATION
2132 #define TARGET_NO_REGISTER_ALLOCATION true
2134 #undef TARGET_RECORD_OFFLOAD_SYMBOL
2135 #define TARGET_RECORD_OFFLOAD_SYMBOL nvptx_record_offload_symbol
2137 #undef TARGET_VECTOR_ALIGNMENT
2138 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
2140 struct gcc_target targetm = TARGET_INITIALIZER;
2142 #include "gt-nvptx.h"