/cp
[official-gcc.git] / gcc / hsa.c
blobff978702489720c1c6250d34d2fe23666b3d4048
1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "is-a.h"
27 #include "hash-set.h"
28 #include "hash-map.h"
29 #include "vec.h"
30 #include "tree.h"
31 #include "dumpfile.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
35 #include "cgraph.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
39 #include "hsa.h"
40 #include "internal-fn.h"
41 #include "ctype.h"
42 #include "builtins.h"
44 /* Structure containing intermediate HSA representation of the generated
45 function. */
46 class hsa_function_representation *hsa_cfun;
48 /* Element of the mapping vector between a host decl and an HSA kernel. */
50 struct GTY(()) hsa_decl_kernel_map_element
52 /* The decl of the host function. */
53 tree decl;
54 /* Name of the HSA kernel in BRIG. */
55 char * GTY((skip)) name;
56 /* Size of OMP data, if the kernel contains a kernel dispatch. */
57 unsigned omp_data_size;
58 /* True if the function is gridified kernel. */
59 bool gridified_kernel_p;
62 /* Mapping between decls and corresponding HSA kernels in this compilation
63 unit. */
65 static GTY (()) vec<hsa_decl_kernel_map_element, va_gc>
66 *hsa_decl_kernel_mapping;
68 /* Mapping between decls and corresponding HSA kernels
69 called by the function. */
70 hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
72 /* Hash function to lookup a symbol for a decl. */
73 hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
75 /* HSA summaries. */
76 hsa_summary_t *hsa_summaries = NULL;
78 /* HSA number of threads. */
79 hsa_symbol *hsa_num_threads = NULL;
81 /* HSA function that cannot be expanded to HSAIL. */
82 hash_set <tree> *hsa_failed_functions = NULL;
84 /* True if compilation unit-wide data are already allocated and initialized. */
85 static bool compilation_unit_data_initialized;
87 /* Return true if FNDECL represents an HSA-callable function. */
89 bool
90 hsa_callable_function_p (tree fndecl)
92 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))
93 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl)));
96 /* Allocate HSA structures that are are used when dealing with different
97 functions. */
99 void
100 hsa_init_compilation_unit_data (void)
102 if (compilation_unit_data_initialized)
103 return;
105 compilation_unit_data_initialized = true;
107 hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8);
108 hsa_failed_functions = new hash_set <tree> ();
109 hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2);
112 /* Free data structures that are used when dealing with different
113 functions. */
115 void
116 hsa_deinit_compilation_unit_data (void)
118 gcc_assert (compilation_unit_data_initialized);
120 delete hsa_failed_functions;
121 delete hsa_emitted_internal_decls;
123 for (hash_table <hsa_noop_symbol_hasher>::iterator it
124 = hsa_global_variable_symbols->begin ();
125 it != hsa_global_variable_symbols->end ();
126 ++it)
128 hsa_symbol *sym = *it;
129 delete sym;
132 delete hsa_global_variable_symbols;
134 if (hsa_num_threads)
136 delete hsa_num_threads;
137 hsa_num_threads = NULL;
140 compilation_unit_data_initialized = false;
143 /* Return true if we are generating large HSA machine model. */
145 bool
146 hsa_machine_large_p (void)
148 /* FIXME: I suppose this is technically wrong but should work for me now. */
149 return (GET_MODE_BITSIZE (Pmode) == 64);
152 /* Return the HSA profile we are using. */
154 bool
155 hsa_full_profile_p (void)
157 return true;
160 /* Return true if a register in operand number OPNUM of instruction
161 is an output. False if it is an input. */
163 bool
164 hsa_insn_basic::op_output_p (unsigned opnum)
166 switch (m_opcode)
168 case HSA_OPCODE_PHI:
169 case BRIG_OPCODE_CBR:
170 case BRIG_OPCODE_SBR:
171 case BRIG_OPCODE_ST:
172 case BRIG_OPCODE_SIGNALNORET:
173 /* FIXME: There are probably missing cases here, double check. */
174 return false;
175 case BRIG_OPCODE_EXPAND:
176 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
177 return opnum < operand_count () - 1;
178 default:
179 return opnum == 0;
183 /* Return true if OPCODE is an floating-point bit instruction opcode. */
185 bool
186 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode)
188 switch (opcode)
190 case BRIG_OPCODE_NEG:
191 case BRIG_OPCODE_ABS:
192 case BRIG_OPCODE_CLASS:
193 case BRIG_OPCODE_COPYSIGN:
194 return true;
195 default:
196 return false;
200 /* Return the number of destination operands for this INSN. */
202 unsigned
203 hsa_insn_basic::input_count ()
205 switch (m_opcode)
207 default:
208 return 1;
210 case BRIG_OPCODE_NOP:
211 return 0;
213 case BRIG_OPCODE_EXPAND:
214 return 2;
216 case BRIG_OPCODE_LD:
217 /* ld_v[234] not yet handled. */
218 return 1;
220 case BRIG_OPCODE_ST:
221 return 0;
223 case BRIG_OPCODE_ATOMICNORET:
224 return 0;
226 case BRIG_OPCODE_SIGNAL:
227 return 1;
229 case BRIG_OPCODE_SIGNALNORET:
230 return 0;
232 case BRIG_OPCODE_MEMFENCE:
233 return 0;
235 case BRIG_OPCODE_RDIMAGE:
236 case BRIG_OPCODE_LDIMAGE:
237 case BRIG_OPCODE_STIMAGE:
238 case BRIG_OPCODE_QUERYIMAGE:
239 case BRIG_OPCODE_QUERYSAMPLER:
240 sorry ("HSA image ops not handled");
241 return 0;
243 case BRIG_OPCODE_CBR:
244 case BRIG_OPCODE_BR:
245 return 0;
247 case BRIG_OPCODE_SBR:
248 return 0; /* ??? */
250 case BRIG_OPCODE_WAVEBARRIER:
251 return 0; /* ??? */
253 case BRIG_OPCODE_BARRIER:
254 case BRIG_OPCODE_ARRIVEFBAR:
255 case BRIG_OPCODE_INITFBAR:
256 case BRIG_OPCODE_JOINFBAR:
257 case BRIG_OPCODE_LEAVEFBAR:
258 case BRIG_OPCODE_RELEASEFBAR:
259 case BRIG_OPCODE_WAITFBAR:
260 return 0;
262 case BRIG_OPCODE_LDF:
263 return 1;
265 case BRIG_OPCODE_ACTIVELANECOUNT:
266 case BRIG_OPCODE_ACTIVELANEID:
267 case BRIG_OPCODE_ACTIVELANEMASK:
268 case BRIG_OPCODE_ACTIVELANEPERMUTE:
269 return 1; /* ??? */
271 case BRIG_OPCODE_CALL:
272 case BRIG_OPCODE_SCALL:
273 case BRIG_OPCODE_ICALL:
274 return 0;
276 case BRIG_OPCODE_RET:
277 return 0;
279 case BRIG_OPCODE_ALLOCA:
280 return 1;
282 case BRIG_OPCODE_CLEARDETECTEXCEPT:
283 return 0;
285 case BRIG_OPCODE_SETDETECTEXCEPT:
286 return 0;
288 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
289 case BRIG_OPCODE_PACKETID:
290 case BRIG_OPCODE_CASQUEUEWRITEINDEX:
291 case BRIG_OPCODE_LDQUEUEREADINDEX:
292 case BRIG_OPCODE_LDQUEUEWRITEINDEX:
293 case BRIG_OPCODE_STQUEUEREADINDEX:
294 case BRIG_OPCODE_STQUEUEWRITEINDEX:
295 return 1; /* ??? */
297 case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
298 return 1;
300 case BRIG_OPCODE_DEBUGTRAP:
301 return 0;
303 case BRIG_OPCODE_GROUPBASEPTR:
304 case BRIG_OPCODE_KERNARGBASEPTR:
305 return 1; /* ??? */
307 case HSA_OPCODE_ARG_BLOCK:
308 return 0;
310 case BRIG_KIND_DIRECTIVE_COMMENT:
311 return 0;
315 /* Return the number of source operands for this INSN. */
317 unsigned
318 hsa_insn_basic::num_used_ops ()
320 gcc_checking_assert (input_count () <= operand_count ());
322 return operand_count () - input_count ();
325 /* Set alignment to VALUE. */
327 void
328 hsa_insn_mem::set_align (BrigAlignment8_t value)
330 /* TODO: Perhaps remove this dump later on: */
331 if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align)
333 fprintf (dump_file, "Decreasing alignment to %u in instruction ", value);
334 dump_hsa_insn (dump_file, this);
336 m_align = value;
339 /* Return size of HSA type T in bits. */
341 unsigned
342 hsa_type_bit_size (BrigType16_t t)
344 switch (t)
346 case BRIG_TYPE_B1:
347 return 1;
349 case BRIG_TYPE_U8:
350 case BRIG_TYPE_S8:
351 case BRIG_TYPE_B8:
352 return 8;
354 case BRIG_TYPE_U16:
355 case BRIG_TYPE_S16:
356 case BRIG_TYPE_B16:
357 case BRIG_TYPE_F16:
358 return 16;
360 case BRIG_TYPE_U32:
361 case BRIG_TYPE_S32:
362 case BRIG_TYPE_B32:
363 case BRIG_TYPE_F32:
364 case BRIG_TYPE_U8X4:
365 case BRIG_TYPE_U16X2:
366 case BRIG_TYPE_S8X4:
367 case BRIG_TYPE_S16X2:
368 case BRIG_TYPE_F16X2:
369 return 32;
371 case BRIG_TYPE_U64:
372 case BRIG_TYPE_S64:
373 case BRIG_TYPE_F64:
374 case BRIG_TYPE_B64:
375 case BRIG_TYPE_U8X8:
376 case BRIG_TYPE_U16X4:
377 case BRIG_TYPE_U32X2:
378 case BRIG_TYPE_S8X8:
379 case BRIG_TYPE_S16X4:
380 case BRIG_TYPE_S32X2:
381 case BRIG_TYPE_F16X4:
382 case BRIG_TYPE_F32X2:
384 return 64;
386 case BRIG_TYPE_B128:
387 case BRIG_TYPE_U8X16:
388 case BRIG_TYPE_U16X8:
389 case BRIG_TYPE_U32X4:
390 case BRIG_TYPE_U64X2:
391 case BRIG_TYPE_S8X16:
392 case BRIG_TYPE_S16X8:
393 case BRIG_TYPE_S32X4:
394 case BRIG_TYPE_S64X2:
395 case BRIG_TYPE_F16X8:
396 case BRIG_TYPE_F32X4:
397 case BRIG_TYPE_F64X2:
398 return 128;
400 default:
401 gcc_assert (hsa_seen_error ());
402 return t;
406 /* Return BRIG bit-type with BITSIZE length. */
408 BrigType16_t
409 hsa_bittype_for_bitsize (unsigned bitsize)
411 switch (bitsize)
413 case 1:
414 return BRIG_TYPE_B1;
415 case 8:
416 return BRIG_TYPE_B8;
417 case 16:
418 return BRIG_TYPE_B16;
419 case 32:
420 return BRIG_TYPE_B32;
421 case 64:
422 return BRIG_TYPE_B64;
423 case 128:
424 return BRIG_TYPE_B128;
425 default:
426 gcc_unreachable ();
430 /* Return BRIG unsigned int type with BITSIZE length. */
432 BrigType16_t
433 hsa_uint_for_bitsize (unsigned bitsize)
435 switch (bitsize)
437 case 8:
438 return BRIG_TYPE_U8;
439 case 16:
440 return BRIG_TYPE_U16;
441 case 32:
442 return BRIG_TYPE_U32;
443 case 64:
444 return BRIG_TYPE_U64;
445 default:
446 gcc_unreachable ();
450 /* Return BRIG float type with BITSIZE length. */
452 BrigType16_t
453 hsa_float_for_bitsize (unsigned bitsize)
455 switch (bitsize)
457 case 16:
458 return BRIG_TYPE_F16;
459 case 32:
460 return BRIG_TYPE_F32;
461 case 64:
462 return BRIG_TYPE_F64;
463 default:
464 gcc_unreachable ();
468 /* Return HSA bit-type with the same size as the type T. */
470 BrigType16_t
471 hsa_bittype_for_type (BrigType16_t t)
473 return hsa_bittype_for_bitsize (hsa_type_bit_size (t));
476 /* Return HSA unsigned integer type with the same size as the type T. */
478 BrigType16_t
479 hsa_unsigned_type_for_type (BrigType16_t t)
481 return hsa_uint_for_bitsize (hsa_type_bit_size (t));
484 /* Return true if TYPE is a packed HSA type. */
486 bool
487 hsa_type_packed_p (BrigType16_t type)
489 return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE;
492 /* Return true if and only if TYPE is a floating point number type. */
494 bool
495 hsa_type_float_p (BrigType16_t type)
497 switch (type & BRIG_TYPE_BASE_MASK)
499 case BRIG_TYPE_F16:
500 case BRIG_TYPE_F32:
501 case BRIG_TYPE_F64:
502 return true;
503 default:
504 return false;
508 /* Return true if and only if TYPE is an integer number type. */
510 bool
511 hsa_type_integer_p (BrigType16_t type)
513 switch (type & BRIG_TYPE_BASE_MASK)
515 case BRIG_TYPE_U8:
516 case BRIG_TYPE_U16:
517 case BRIG_TYPE_U32:
518 case BRIG_TYPE_U64:
519 case BRIG_TYPE_S8:
520 case BRIG_TYPE_S16:
521 case BRIG_TYPE_S32:
522 case BRIG_TYPE_S64:
523 return true;
524 default:
525 return false;
529 /* Return true if and only if TYPE is an bit-type. */
531 bool
532 hsa_btype_p (BrigType16_t type)
534 switch (type & BRIG_TYPE_BASE_MASK)
536 case BRIG_TYPE_B8:
537 case BRIG_TYPE_B16:
538 case BRIG_TYPE_B32:
539 case BRIG_TYPE_B64:
540 case BRIG_TYPE_B128:
541 return true;
542 default:
543 return false;
548 /* Return HSA alignment encoding alignment to N bits. */
550 BrigAlignment8_t
551 hsa_alignment_encoding (unsigned n)
553 gcc_assert (n >= 8 && !(n & (n - 1)));
554 if (n >= 256)
555 return BRIG_ALIGNMENT_32;
557 switch (n)
559 case 8:
560 return BRIG_ALIGNMENT_1;
561 case 16:
562 return BRIG_ALIGNMENT_2;
563 case 32:
564 return BRIG_ALIGNMENT_4;
565 case 64:
566 return BRIG_ALIGNMENT_8;
567 case 128:
568 return BRIG_ALIGNMENT_16;
569 default:
570 gcc_unreachable ();
574 /* Return HSA alignment encoding alignment of T got
575 by get_object_alignment. */
577 BrigAlignment8_t
578 hsa_object_alignment (tree t)
580 return hsa_alignment_encoding (get_object_alignment (t));
583 /* Return byte alignment for given BrigAlignment8_t value. */
585 unsigned
586 hsa_byte_alignment (BrigAlignment8_t alignment)
588 gcc_assert (alignment != BRIG_ALIGNMENT_NONE);
590 return 1 << (alignment - 1);
593 /* Return natural alignment of HSA TYPE. */
595 BrigAlignment8_t
596 hsa_natural_alignment (BrigType16_t type)
598 return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY));
601 /* Call the correct destructor of a HSA instruction. */
603 void
604 hsa_destroy_insn (hsa_insn_basic *insn)
606 if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn))
607 phi->~hsa_insn_phi ();
608 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
609 br->~hsa_insn_br ();
610 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
611 cmp->~hsa_insn_cmp ();
612 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
613 mem->~hsa_insn_mem ();
614 else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn))
615 atomic->~hsa_insn_atomic ();
616 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
617 seg->~hsa_insn_seg ();
618 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
619 call->~hsa_insn_call ();
620 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
621 block->~hsa_insn_arg_block ();
622 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
623 sbr->~hsa_insn_sbr ();
624 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
625 comment->~hsa_insn_comment ();
626 else
627 insn->~hsa_insn_basic ();
630 /* Call the correct destructor of a HSA operand. */
632 void
633 hsa_destroy_operand (hsa_op_base *op)
635 if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op))
636 list->~hsa_op_code_list ();
637 else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op))
638 list->~hsa_op_operand_list ();
639 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
640 reg->~hsa_op_reg ();
641 else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op))
642 immed->~hsa_op_immed ();
643 else
644 op->~hsa_op_base ();
647 /* Create a mapping between the original function DECL and kernel name NAME. */
649 void
650 hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size,
651 bool gridified_kernel_p)
653 hsa_decl_kernel_map_element dkm;
654 dkm.decl = decl;
655 dkm.name = name;
656 dkm.omp_data_size = omp_data_size;
657 dkm.gridified_kernel_p = gridified_kernel_p;
658 vec_safe_push (hsa_decl_kernel_mapping, dkm);
661 /* Return the number of kernel decl name mappings. */
663 unsigned
664 hsa_get_number_decl_kernel_mappings (void)
666 return vec_safe_length (hsa_decl_kernel_mapping);
669 /* Return the decl in the Ith kernel decl name mapping. */
671 tree
672 hsa_get_decl_kernel_mapping_decl (unsigned i)
674 return (*hsa_decl_kernel_mapping)[i].decl;
677 /* Return the name in the Ith kernel decl name mapping. */
679 char *
680 hsa_get_decl_kernel_mapping_name (unsigned i)
682 return (*hsa_decl_kernel_mapping)[i].name;
685 /* Return maximum OMP size for kernel decl name mapping. */
687 unsigned
688 hsa_get_decl_kernel_mapping_omp_size (unsigned i)
690 return (*hsa_decl_kernel_mapping)[i].omp_data_size;
693 /* Return if the function is gridified kernel in decl name mapping. */
695 bool
696 hsa_get_decl_kernel_mapping_gridified (unsigned i)
698 return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p;
701 /* Free the mapping between original decls and kernel names. */
703 void
704 hsa_free_decl_kernel_mapping (void)
706 if (hsa_decl_kernel_mapping == NULL)
707 return;
709 for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
710 free ((*hsa_decl_kernel_mapping)[i].name);
711 ggc_free (hsa_decl_kernel_mapping);
714 /* Add new kernel dependency. */
716 void
717 hsa_add_kernel_dependency (tree caller, const char *called_function)
719 if (hsa_decl_kernel_dependencies == NULL)
720 hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> ();
722 vec <const char *> *s = NULL;
723 vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller);
724 if (slot == NULL)
726 s = new vec <const char *> ();
727 hsa_decl_kernel_dependencies->put (caller, s);
729 else
730 s = *slot;
732 s->safe_push (called_function);
735 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
736 order to minimize the number of GTY roots, we'll root them all in the
737 following array. The individual elements should only be accessed by the
738 very simple getters (of a pointer-to-tree) below. */
740 static GTY(()) tree hsa_tree_gt_roots[3];
742 tree *
743 hsa_get_ctor_statements (void)
745 return &hsa_tree_gt_roots[0];
748 tree *
749 hsa_get_dtor_statements (void)
751 return &hsa_tree_gt_roots[1];
754 tree *
755 hsa_get_kernel_dispatch_type (void)
757 return &hsa_tree_gt_roots[2];
760 /* Modify the name P in-place so that it is a valid HSA identifier. */
762 void
763 hsa_sanitize_name (char *p)
765 for (; *p; p++)
766 if (*p == '.' || *p == '-')
767 *p = '_';
770 /* Clone the name P, set trailing ampersand and sanitize the name. */
772 char *
773 hsa_brig_function_name (const char *p)
775 unsigned len = strlen (p);
776 char *buf = XNEWVEC (char, len + 2);
778 buf[0] = '&';
779 buf[len + 1] = '\0';
780 memcpy (buf + 1, p, len);
782 hsa_sanitize_name (buf);
783 return buf;
786 /* Return declaration name if exists. */
788 const char *
789 hsa_get_declaration_name (tree decl)
791 if (!DECL_NAME (decl))
793 char buf[64];
794 snprintf (buf, 64, "__hsa_anonymous_%i", DECL_UID (decl));
795 const char *ggc_str = ggc_strdup (buf);
796 return ggc_str;
799 tree name_tree;
800 if (TREE_CODE (decl) == FUNCTION_DECL
801 || (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)))
802 name_tree = DECL_ASSEMBLER_NAME (decl);
803 else
804 name_tree = DECL_NAME (decl);
806 const char *name = IDENTIFIER_POINTER (name_tree);
807 /* User-defined assembly names have prepended asterisk symbol. */
808 if (name[0] == '*')
809 name++;
811 return name;
814 void
815 hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
816 hsa_function_kind kind, bool gridified_kernel_p)
818 hsa_function_summary *gpu_summary = get (gpu);
819 hsa_function_summary *host_summary = get (host);
821 gpu_summary->m_kind = kind;
822 host_summary->m_kind = kind;
824 gpu_summary->m_gpu_implementation_p = true;
825 host_summary->m_gpu_implementation_p = false;
827 gpu_summary->m_gridified_kernel_p = gridified_kernel_p;
828 host_summary->m_gridified_kernel_p = gridified_kernel_p;
830 gpu_summary->m_binded_function = host;
831 host_summary->m_binded_function = gpu;
833 tree gdecl = gpu->decl;
834 DECL_ATTRIBUTES (gdecl)
835 = tree_cons (get_identifier ("flatten"), NULL_TREE,
836 DECL_ATTRIBUTES (gdecl));
838 tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl);
839 if (fn_opts == NULL_TREE)
840 fn_opts = optimization_default_node;
841 fn_opts = copy_node (fn_opts);
842 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false;
843 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false;
844 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts;
846 /* Create reference between a kernel and a corresponding host implementation
847 to quarantee LTO streaming to a same LTRANS. */
848 if (kind == HSA_KERNEL)
849 gpu->create_reference (host, IPA_REF_ADDR);
852 /* Add a HOST function to HSA summaries. */
854 void
855 hsa_register_kernel (cgraph_node *host)
857 if (hsa_summaries == NULL)
858 hsa_summaries = new hsa_summary_t (symtab);
859 hsa_function_summary *s = hsa_summaries->get (host);
860 s->m_kind = HSA_KERNEL;
863 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
864 a HOST function. */
866 void
867 hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
869 if (hsa_summaries == NULL)
870 hsa_summaries = new hsa_summary_t (symtab);
871 hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
874 /* Return true if expansion of the current HSA function has already failed. */
876 bool
877 hsa_seen_error (void)
879 return hsa_cfun->m_seen_error;
882 /* Mark current HSA function as failed. */
884 void
885 hsa_fail_cfun (void)
887 hsa_failed_functions->add (hsa_cfun->m_decl);
888 hsa_cfun->m_seen_error = true;
891 char *
892 hsa_internal_fn::name ()
894 char *name = xstrdup (internal_fn_name (m_fn));
895 for (char *ptr = name; *ptr; ptr++)
896 *ptr = TOLOWER (*ptr);
898 const char *suffix = NULL;
899 if (m_type_bit_size == 32)
900 suffix = "f";
902 if (suffix)
904 char *name2 = concat (name, suffix, NULL);
905 free (name);
906 name = name2;
909 hsa_sanitize_name (name);
910 return name;
913 unsigned
914 hsa_internal_fn::get_arity ()
916 switch (m_fn)
918 case IFN_ACOS:
919 case IFN_ASIN:
920 case IFN_ATAN:
921 case IFN_COS:
922 case IFN_EXP:
923 case IFN_EXP10:
924 case IFN_EXP2:
925 case IFN_EXPM1:
926 case IFN_LOG:
927 case IFN_LOG10:
928 case IFN_LOG1P:
929 case IFN_LOG2:
930 case IFN_LOGB:
931 case IFN_SIGNIFICAND:
932 case IFN_SIN:
933 case IFN_SQRT:
934 case IFN_TAN:
935 case IFN_CEIL:
936 case IFN_FLOOR:
937 case IFN_NEARBYINT:
938 case IFN_RINT:
939 case IFN_ROUND:
940 case IFN_TRUNC:
941 return 1;
942 case IFN_ATAN2:
943 case IFN_COPYSIGN:
944 case IFN_FMOD:
945 case IFN_POW:
946 case IFN_REMAINDER:
947 case IFN_SCALB:
948 case IFN_LDEXP:
949 return 2;
950 break;
951 case IFN_CLRSB:
952 case IFN_CLZ:
953 case IFN_CTZ:
954 case IFN_FFS:
955 case IFN_PARITY:
956 case IFN_POPCOUNT:
957 default:
958 /* As we produce sorry message for unknown internal functions,
959 reaching this label is definitely a bug. */
960 gcc_unreachable ();
964 BrigType16_t
965 hsa_internal_fn::get_argument_type (int n)
967 switch (m_fn)
969 case IFN_ACOS:
970 case IFN_ASIN:
971 case IFN_ATAN:
972 case IFN_COS:
973 case IFN_EXP:
974 case IFN_EXP10:
975 case IFN_EXP2:
976 case IFN_EXPM1:
977 case IFN_LOG:
978 case IFN_LOG10:
979 case IFN_LOG1P:
980 case IFN_LOG2:
981 case IFN_LOGB:
982 case IFN_SIGNIFICAND:
983 case IFN_SIN:
984 case IFN_SQRT:
985 case IFN_TAN:
986 case IFN_CEIL:
987 case IFN_FLOOR:
988 case IFN_NEARBYINT:
989 case IFN_RINT:
990 case IFN_ROUND:
991 case IFN_TRUNC:
992 case IFN_ATAN2:
993 case IFN_COPYSIGN:
994 case IFN_FMOD:
995 case IFN_POW:
996 case IFN_REMAINDER:
997 case IFN_SCALB:
998 return hsa_float_for_bitsize (m_type_bit_size);
999 case IFN_LDEXP:
1001 if (n == -1 || n == 0)
1002 return hsa_float_for_bitsize (m_type_bit_size);
1003 else
1004 return BRIG_TYPE_S32;
1006 default:
1007 /* As we produce sorry message for unknown internal functions,
1008 reaching this label is definitely a bug. */
1009 gcc_unreachable ();
1013 #include "gt-hsa.h"