1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
40 #include "internal-fn.h"
44 /* Structure containing intermediate HSA representation of the generated
46 class hsa_function_representation
*hsa_cfun
;
48 /* Element of the mapping vector between a host decl and an HSA kernel. */
50 struct GTY(()) hsa_decl_kernel_map_element
52 /* The decl of the host function. */
54 /* Name of the HSA kernel in BRIG. */
55 char * GTY((skip
)) name
;
56 /* Size of OMP data, if the kernel contains a kernel dispatch. */
57 unsigned omp_data_size
;
58 /* True if the function is gridified kernel. */
59 bool gridified_kernel_p
;
62 /* Mapping between decls and corresponding HSA kernels in this compilation
65 static GTY (()) vec
<hsa_decl_kernel_map_element
, va_gc
>
66 *hsa_decl_kernel_mapping
;
68 /* Mapping between decls and corresponding HSA kernels
69 called by the function. */
70 hash_map
<tree
, vec
<const char *> *> *hsa_decl_kernel_dependencies
;
72 /* Hash function to lookup a symbol for a decl. */
73 hash_table
<hsa_noop_symbol_hasher
> *hsa_global_variable_symbols
;
76 hsa_summary_t
*hsa_summaries
= NULL
;
78 /* HSA number of threads. */
79 hsa_symbol
*hsa_num_threads
= NULL
;
81 /* HSA function that cannot be expanded to HSAIL. */
82 hash_set
<tree
> *hsa_failed_functions
= NULL
;
84 /* True if compilation unit-wide data are already allocated and initialized. */
85 static bool compilation_unit_data_initialized
;
87 /* Return true if FNDECL represents an HSA-callable function. */
90 hsa_callable_function_p (tree fndecl
)
92 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl
))
93 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl
)));
96 /* Allocate HSA structures that are are used when dealing with different
100 hsa_init_compilation_unit_data (void)
102 if (compilation_unit_data_initialized
)
105 compilation_unit_data_initialized
= true;
107 hsa_global_variable_symbols
= new hash_table
<hsa_noop_symbol_hasher
> (8);
108 hsa_failed_functions
= new hash_set
<tree
> ();
109 hsa_emitted_internal_decls
= new hash_table
<hsa_internal_fn_hasher
> (2);
112 /* Free data structures that are used when dealing with different
116 hsa_deinit_compilation_unit_data (void)
118 gcc_assert (compilation_unit_data_initialized
);
120 delete hsa_failed_functions
;
121 delete hsa_emitted_internal_decls
;
123 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
124 = hsa_global_variable_symbols
->begin ();
125 it
!= hsa_global_variable_symbols
->end ();
128 hsa_symbol
*sym
= *it
;
132 delete hsa_global_variable_symbols
;
136 delete hsa_num_threads
;
137 hsa_num_threads
= NULL
;
140 compilation_unit_data_initialized
= false;
143 /* Return true if we are generating large HSA machine model. */
146 hsa_machine_large_p (void)
148 /* FIXME: I suppose this is technically wrong but should work for me now. */
149 return (GET_MODE_BITSIZE (Pmode
) == 64);
152 /* Return the HSA profile we are using. */
155 hsa_full_profile_p (void)
160 /* Return true if a register in operand number OPNUM of instruction
161 is an output. False if it is an input. */
164 hsa_insn_basic::op_output_p (unsigned opnum
)
169 case BRIG_OPCODE_CBR
:
170 case BRIG_OPCODE_SBR
:
172 case BRIG_OPCODE_SIGNALNORET
:
173 /* FIXME: There are probably missing cases here, double check. */
175 case BRIG_OPCODE_EXPAND
:
176 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
177 return opnum
< operand_count () - 1;
183 /* Return true if OPCODE is an floating-point bit instruction opcode. */
186 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode
)
190 case BRIG_OPCODE_NEG
:
191 case BRIG_OPCODE_ABS
:
192 case BRIG_OPCODE_CLASS
:
193 case BRIG_OPCODE_COPYSIGN
:
200 /* Return the number of destination operands for this INSN. */
203 hsa_insn_basic::input_count ()
210 case BRIG_OPCODE_NOP
:
213 case BRIG_OPCODE_EXPAND
:
217 /* ld_v[234] not yet handled. */
223 case BRIG_OPCODE_ATOMICNORET
:
226 case BRIG_OPCODE_SIGNAL
:
229 case BRIG_OPCODE_SIGNALNORET
:
232 case BRIG_OPCODE_MEMFENCE
:
235 case BRIG_OPCODE_RDIMAGE
:
236 case BRIG_OPCODE_LDIMAGE
:
237 case BRIG_OPCODE_STIMAGE
:
238 case BRIG_OPCODE_QUERYIMAGE
:
239 case BRIG_OPCODE_QUERYSAMPLER
:
240 sorry ("HSA image ops not handled");
243 case BRIG_OPCODE_CBR
:
247 case BRIG_OPCODE_SBR
:
250 case BRIG_OPCODE_WAVEBARRIER
:
253 case BRIG_OPCODE_BARRIER
:
254 case BRIG_OPCODE_ARRIVEFBAR
:
255 case BRIG_OPCODE_INITFBAR
:
256 case BRIG_OPCODE_JOINFBAR
:
257 case BRIG_OPCODE_LEAVEFBAR
:
258 case BRIG_OPCODE_RELEASEFBAR
:
259 case BRIG_OPCODE_WAITFBAR
:
262 case BRIG_OPCODE_LDF
:
265 case BRIG_OPCODE_ACTIVELANECOUNT
:
266 case BRIG_OPCODE_ACTIVELANEID
:
267 case BRIG_OPCODE_ACTIVELANEMASK
:
268 case BRIG_OPCODE_ACTIVELANEPERMUTE
:
271 case BRIG_OPCODE_CALL
:
272 case BRIG_OPCODE_SCALL
:
273 case BRIG_OPCODE_ICALL
:
276 case BRIG_OPCODE_RET
:
279 case BRIG_OPCODE_ALLOCA
:
282 case BRIG_OPCODE_CLEARDETECTEXCEPT
:
285 case BRIG_OPCODE_SETDETECTEXCEPT
:
288 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
289 case BRIG_OPCODE_PACKETID
:
290 case BRIG_OPCODE_CASQUEUEWRITEINDEX
:
291 case BRIG_OPCODE_LDQUEUEREADINDEX
:
292 case BRIG_OPCODE_LDQUEUEWRITEINDEX
:
293 case BRIG_OPCODE_STQUEUEREADINDEX
:
294 case BRIG_OPCODE_STQUEUEWRITEINDEX
:
297 case BRIG_OPCODE_ADDQUEUEWRITEINDEX
:
300 case BRIG_OPCODE_DEBUGTRAP
:
303 case BRIG_OPCODE_GROUPBASEPTR
:
304 case BRIG_OPCODE_KERNARGBASEPTR
:
307 case HSA_OPCODE_ARG_BLOCK
:
310 case BRIG_KIND_DIRECTIVE_COMMENT
:
315 /* Return the number of source operands for this INSN. */
318 hsa_insn_basic::num_used_ops ()
320 gcc_checking_assert (input_count () <= operand_count ());
322 return operand_count () - input_count ();
325 /* Set alignment to VALUE. */
328 hsa_insn_mem::set_align (BrigAlignment8_t value
)
330 /* TODO: Perhaps remove this dump later on: */
331 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && value
< m_align
)
333 fprintf (dump_file
, "Decreasing alignment to %u in instruction ", value
);
334 dump_hsa_insn (dump_file
, this);
339 /* Return size of HSA type T in bits. */
342 hsa_type_bit_size (BrigType16_t t
)
365 case BRIG_TYPE_U16X2
:
367 case BRIG_TYPE_S16X2
:
368 case BRIG_TYPE_F16X2
:
376 case BRIG_TYPE_U16X4
:
377 case BRIG_TYPE_U32X2
:
379 case BRIG_TYPE_S16X4
:
380 case BRIG_TYPE_S32X2
:
381 case BRIG_TYPE_F16X4
:
382 case BRIG_TYPE_F32X2
:
387 case BRIG_TYPE_U8X16
:
388 case BRIG_TYPE_U16X8
:
389 case BRIG_TYPE_U32X4
:
390 case BRIG_TYPE_U64X2
:
391 case BRIG_TYPE_S8X16
:
392 case BRIG_TYPE_S16X8
:
393 case BRIG_TYPE_S32X4
:
394 case BRIG_TYPE_S64X2
:
395 case BRIG_TYPE_F16X8
:
396 case BRIG_TYPE_F32X4
:
397 case BRIG_TYPE_F64X2
:
401 gcc_assert (hsa_seen_error ());
406 /* Return BRIG bit-type with BITSIZE length. */
409 hsa_bittype_for_bitsize (unsigned bitsize
)
418 return BRIG_TYPE_B16
;
420 return BRIG_TYPE_B32
;
422 return BRIG_TYPE_B64
;
424 return BRIG_TYPE_B128
;
430 /* Return BRIG unsigned int type with BITSIZE length. */
433 hsa_uint_for_bitsize (unsigned bitsize
)
440 return BRIG_TYPE_U16
;
442 return BRIG_TYPE_U32
;
444 return BRIG_TYPE_U64
;
450 /* Return BRIG float type with BITSIZE length. */
453 hsa_float_for_bitsize (unsigned bitsize
)
458 return BRIG_TYPE_F16
;
460 return BRIG_TYPE_F32
;
462 return BRIG_TYPE_F64
;
468 /* Return HSA bit-type with the same size as the type T. */
471 hsa_bittype_for_type (BrigType16_t t
)
473 return hsa_bittype_for_bitsize (hsa_type_bit_size (t
));
476 /* Return HSA unsigned integer type with the same size as the type T. */
479 hsa_unsigned_type_for_type (BrigType16_t t
)
481 return hsa_uint_for_bitsize (hsa_type_bit_size (t
));
484 /* Return true if TYPE is a packed HSA type. */
487 hsa_type_packed_p (BrigType16_t type
)
489 return (type
& BRIG_TYPE_PACK_MASK
) != BRIG_TYPE_PACK_NONE
;
492 /* Return true if and only if TYPE is a floating point number type. */
495 hsa_type_float_p (BrigType16_t type
)
497 switch (type
& BRIG_TYPE_BASE_MASK
)
508 /* Return true if and only if TYPE is an integer number type. */
511 hsa_type_integer_p (BrigType16_t type
)
513 switch (type
& BRIG_TYPE_BASE_MASK
)
529 /* Return true if and only if TYPE is an bit-type. */
532 hsa_btype_p (BrigType16_t type
)
534 switch (type
& BRIG_TYPE_BASE_MASK
)
548 /* Return HSA alignment encoding alignment to N bits. */
551 hsa_alignment_encoding (unsigned n
)
553 gcc_assert (n
>= 8 && !(n
& (n
- 1)));
555 return BRIG_ALIGNMENT_32
;
560 return BRIG_ALIGNMENT_1
;
562 return BRIG_ALIGNMENT_2
;
564 return BRIG_ALIGNMENT_4
;
566 return BRIG_ALIGNMENT_8
;
568 return BRIG_ALIGNMENT_16
;
574 /* Return HSA alignment encoding alignment of T got
575 by get_object_alignment. */
578 hsa_object_alignment (tree t
)
580 return hsa_alignment_encoding (get_object_alignment (t
));
583 /* Return byte alignment for given BrigAlignment8_t value. */
586 hsa_byte_alignment (BrigAlignment8_t alignment
)
588 gcc_assert (alignment
!= BRIG_ALIGNMENT_NONE
);
590 return 1 << (alignment
- 1);
593 /* Return natural alignment of HSA TYPE. */
596 hsa_natural_alignment (BrigType16_t type
)
598 return hsa_alignment_encoding (hsa_type_bit_size (type
& ~BRIG_TYPE_ARRAY
));
601 /* Call the correct destructor of a HSA instruction. */
604 hsa_destroy_insn (hsa_insn_basic
*insn
)
606 if (hsa_insn_phi
*phi
= dyn_cast
<hsa_insn_phi
*> (insn
))
607 phi
->~hsa_insn_phi ();
608 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
610 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
611 cmp
->~hsa_insn_cmp ();
612 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
613 mem
->~hsa_insn_mem ();
614 else if (hsa_insn_atomic
*atomic
= dyn_cast
<hsa_insn_atomic
*> (insn
))
615 atomic
->~hsa_insn_atomic ();
616 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
617 seg
->~hsa_insn_seg ();
618 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
619 call
->~hsa_insn_call ();
620 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
621 block
->~hsa_insn_arg_block ();
622 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
623 sbr
->~hsa_insn_sbr ();
624 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
625 comment
->~hsa_insn_comment ();
627 insn
->~hsa_insn_basic ();
630 /* Call the correct destructor of a HSA operand. */
633 hsa_destroy_operand (hsa_op_base
*op
)
635 if (hsa_op_code_list
*list
= dyn_cast
<hsa_op_code_list
*> (op
))
636 list
->~hsa_op_code_list ();
637 else if (hsa_op_operand_list
*list
= dyn_cast
<hsa_op_operand_list
*> (op
))
638 list
->~hsa_op_operand_list ();
639 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
641 else if (hsa_op_immed
*immed
= dyn_cast
<hsa_op_immed
*> (op
))
642 immed
->~hsa_op_immed ();
647 /* Create a mapping between the original function DECL and kernel name NAME. */
650 hsa_add_kern_decl_mapping (tree decl
, char *name
, unsigned omp_data_size
,
651 bool gridified_kernel_p
)
653 hsa_decl_kernel_map_element dkm
;
656 dkm
.omp_data_size
= omp_data_size
;
657 dkm
.gridified_kernel_p
= gridified_kernel_p
;
658 vec_safe_push (hsa_decl_kernel_mapping
, dkm
);
661 /* Return the number of kernel decl name mappings. */
664 hsa_get_number_decl_kernel_mappings (void)
666 return vec_safe_length (hsa_decl_kernel_mapping
);
669 /* Return the decl in the Ith kernel decl name mapping. */
672 hsa_get_decl_kernel_mapping_decl (unsigned i
)
674 return (*hsa_decl_kernel_mapping
)[i
].decl
;
677 /* Return the name in the Ith kernel decl name mapping. */
680 hsa_get_decl_kernel_mapping_name (unsigned i
)
682 return (*hsa_decl_kernel_mapping
)[i
].name
;
685 /* Return maximum OMP size for kernel decl name mapping. */
688 hsa_get_decl_kernel_mapping_omp_size (unsigned i
)
690 return (*hsa_decl_kernel_mapping
)[i
].omp_data_size
;
693 /* Return if the function is gridified kernel in decl name mapping. */
696 hsa_get_decl_kernel_mapping_gridified (unsigned i
)
698 return (*hsa_decl_kernel_mapping
)[i
].gridified_kernel_p
;
701 /* Free the mapping between original decls and kernel names. */
704 hsa_free_decl_kernel_mapping (void)
706 if (hsa_decl_kernel_mapping
== NULL
)
709 for (unsigned i
= 0; i
< hsa_decl_kernel_mapping
->length (); ++i
)
710 free ((*hsa_decl_kernel_mapping
)[i
].name
);
711 ggc_free (hsa_decl_kernel_mapping
);
714 /* Add new kernel dependency. */
717 hsa_add_kernel_dependency (tree caller
, const char *called_function
)
719 if (hsa_decl_kernel_dependencies
== NULL
)
720 hsa_decl_kernel_dependencies
= new hash_map
<tree
, vec
<const char *> *> ();
722 vec
<const char *> *s
= NULL
;
723 vec
<const char *> **slot
= hsa_decl_kernel_dependencies
->get (caller
);
726 s
= new vec
<const char *> ();
727 hsa_decl_kernel_dependencies
->put (caller
, s
);
732 s
->safe_push (called_function
);
735 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
736 order to minimize the number of GTY roots, we'll root them all in the
737 following array. The individual elements should only be accessed by the
738 very simple getters (of a pointer-to-tree) below. */
740 static GTY(()) tree hsa_tree_gt_roots
[3];
743 hsa_get_ctor_statements (void)
745 return &hsa_tree_gt_roots
[0];
749 hsa_get_dtor_statements (void)
751 return &hsa_tree_gt_roots
[1];
755 hsa_get_kernel_dispatch_type (void)
757 return &hsa_tree_gt_roots
[2];
760 /* Modify the name P in-place so that it is a valid HSA identifier. */
763 hsa_sanitize_name (char *p
)
766 if (*p
== '.' || *p
== '-')
770 /* Clone the name P, set trailing ampersand and sanitize the name. */
773 hsa_brig_function_name (const char *p
)
775 unsigned len
= strlen (p
);
776 char *buf
= XNEWVEC (char, len
+ 2);
780 memcpy (buf
+ 1, p
, len
);
782 hsa_sanitize_name (buf
);
786 /* Return declaration name if exists. */
789 hsa_get_declaration_name (tree decl
)
791 if (!DECL_NAME (decl
))
794 snprintf (buf
, 64, "__hsa_anonymous_%i", DECL_UID (decl
));
795 const char *ggc_str
= ggc_strdup (buf
);
800 if (TREE_CODE (decl
) == FUNCTION_DECL
801 || (TREE_CODE (decl
) == VAR_DECL
&& is_global_var (decl
)))
802 name_tree
= DECL_ASSEMBLER_NAME (decl
);
804 name_tree
= DECL_NAME (decl
);
806 const char *name
= IDENTIFIER_POINTER (name_tree
);
807 /* User-defined assembly names have prepended asterisk symbol. */
815 hsa_summary_t::link_functions (cgraph_node
*gpu
, cgraph_node
*host
,
816 hsa_function_kind kind
, bool gridified_kernel_p
)
818 hsa_function_summary
*gpu_summary
= get (gpu
);
819 hsa_function_summary
*host_summary
= get (host
);
821 gpu_summary
->m_kind
= kind
;
822 host_summary
->m_kind
= kind
;
824 gpu_summary
->m_gpu_implementation_p
= true;
825 host_summary
->m_gpu_implementation_p
= false;
827 gpu_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
828 host_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
830 gpu_summary
->m_binded_function
= host
;
831 host_summary
->m_binded_function
= gpu
;
833 tree gdecl
= gpu
->decl
;
834 DECL_ATTRIBUTES (gdecl
)
835 = tree_cons (get_identifier ("flatten"), NULL_TREE
,
836 DECL_ATTRIBUTES (gdecl
));
838 tree fn_opts
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
);
839 if (fn_opts
== NULL_TREE
)
840 fn_opts
= optimization_default_node
;
841 fn_opts
= copy_node (fn_opts
);
842 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_loop_vectorize
= false;
843 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_slp_vectorize
= false;
844 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
) = fn_opts
;
846 /* Create reference between a kernel and a corresponding host implementation
847 to quarantee LTO streaming to a same LTRANS. */
848 if (kind
== HSA_KERNEL
)
849 gpu
->create_reference (host
, IPA_REF_ADDR
);
852 /* Add a HOST function to HSA summaries. */
855 hsa_register_kernel (cgraph_node
*host
)
857 if (hsa_summaries
== NULL
)
858 hsa_summaries
= new hsa_summary_t (symtab
);
859 hsa_function_summary
*s
= hsa_summaries
->get (host
);
860 s
->m_kind
= HSA_KERNEL
;
863 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
867 hsa_register_kernel (cgraph_node
*gpu
, cgraph_node
*host
)
869 if (hsa_summaries
== NULL
)
870 hsa_summaries
= new hsa_summary_t (symtab
);
871 hsa_summaries
->link_functions (gpu
, host
, HSA_KERNEL
, true);
874 /* Return true if expansion of the current HSA function has already failed. */
877 hsa_seen_error (void)
879 return hsa_cfun
->m_seen_error
;
882 /* Mark current HSA function as failed. */
887 hsa_failed_functions
->add (hsa_cfun
->m_decl
);
888 hsa_cfun
->m_seen_error
= true;
892 hsa_internal_fn::name ()
894 char *name
= xstrdup (internal_fn_name (m_fn
));
895 for (char *ptr
= name
; *ptr
; ptr
++)
896 *ptr
= TOLOWER (*ptr
);
898 const char *suffix
= NULL
;
899 if (m_type_bit_size
== 32)
904 char *name2
= concat (name
, suffix
, NULL
);
909 hsa_sanitize_name (name
);
914 hsa_internal_fn::get_arity ()
931 case IFN_SIGNIFICAND
:
958 /* As we produce sorry message for unknown internal functions,
959 reaching this label is definitely a bug. */
965 hsa_internal_fn::get_argument_type (int n
)
982 case IFN_SIGNIFICAND
:
998 return hsa_float_for_bitsize (m_type_bit_size
);
1001 if (n
== -1 || n
== 0)
1002 return hsa_float_for_bitsize (m_type_bit_size
);
1004 return BRIG_TYPE_S32
;
1007 /* As we produce sorry message for unknown internal functions,
1008 reaching this label is definitely a bug. */