1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
39 #include "hsa-common.h"
40 #include "internal-fn.h"
43 #include "stringpool.h"
46 /* Structure containing intermediate HSA representation of the generated
48 class hsa_function_representation
*hsa_cfun
;
50 /* Element of the mapping vector between a host decl and an HSA kernel. */
52 struct GTY(()) hsa_decl_kernel_map_element
54 /* The decl of the host function. */
56 /* Name of the HSA kernel in BRIG. */
57 char * GTY((skip
)) name
;
58 /* Size of OMP data, if the kernel contains a kernel dispatch. */
59 unsigned omp_data_size
;
60 /* True if the function is gridified kernel. */
61 bool gridified_kernel_p
;
64 /* Mapping between decls and corresponding HSA kernels in this compilation
67 static GTY (()) vec
<hsa_decl_kernel_map_element
, va_gc
>
68 *hsa_decl_kernel_mapping
;
70 /* Mapping between decls and corresponding HSA kernels
71 called by the function. */
72 hash_map
<tree
, vec
<const char *> *> *hsa_decl_kernel_dependencies
;
74 /* Hash function to lookup a symbol for a decl. */
75 hash_table
<hsa_noop_symbol_hasher
> *hsa_global_variable_symbols
;
78 hsa_summary_t
*hsa_summaries
= NULL
;
80 /* HSA number of threads. */
81 hsa_symbol
*hsa_num_threads
= NULL
;
83 /* HSA function that cannot be expanded to HSAIL. */
84 hash_set
<tree
> *hsa_failed_functions
= NULL
;
86 /* True if compilation unit-wide data are already allocated and initialized. */
87 static bool compilation_unit_data_initialized
;
89 /* Return true if FNDECL represents an HSA-callable function. */
92 hsa_callable_function_p (tree fndecl
)
94 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl
))
95 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl
)));
98 /* Allocate HSA structures that are are used when dealing with different
102 hsa_init_compilation_unit_data (void)
104 if (compilation_unit_data_initialized
)
107 compilation_unit_data_initialized
= true;
109 hsa_global_variable_symbols
= new hash_table
<hsa_noop_symbol_hasher
> (8);
110 hsa_failed_functions
= new hash_set
<tree
> ();
111 hsa_emitted_internal_decls
= new hash_table
<hsa_internal_fn_hasher
> (2);
114 /* Free data structures that are used when dealing with different
118 hsa_deinit_compilation_unit_data (void)
120 gcc_assert (compilation_unit_data_initialized
);
122 delete hsa_failed_functions
;
123 delete hsa_emitted_internal_decls
;
125 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
126 = hsa_global_variable_symbols
->begin ();
127 it
!= hsa_global_variable_symbols
->end ();
130 hsa_symbol
*sym
= *it
;
134 delete hsa_global_variable_symbols
;
138 delete hsa_num_threads
;
139 hsa_num_threads
= NULL
;
142 compilation_unit_data_initialized
= false;
145 /* Return true if we are generating large HSA machine model. */
148 hsa_machine_large_p (void)
150 /* FIXME: I suppose this is technically wrong but should work for me now. */
151 return (GET_MODE_BITSIZE (Pmode
) == 64);
154 /* Return the HSA profile we are using. */
157 hsa_full_profile_p (void)
162 /* Return true if a register in operand number OPNUM of instruction
163 is an output. False if it is an input. */
166 hsa_insn_basic::op_output_p (unsigned opnum
)
171 case BRIG_OPCODE_CBR
:
172 case BRIG_OPCODE_SBR
:
174 case BRIG_OPCODE_SIGNALNORET
:
175 case BRIG_OPCODE_DEBUGTRAP
:
176 /* FIXME: There are probably missing cases here, double check. */
178 case BRIG_OPCODE_EXPAND
:
179 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
180 return opnum
< operand_count () - 1;
186 /* Return true if OPCODE is an floating-point bit instruction opcode. */
189 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode
)
193 case BRIG_OPCODE_NEG
:
194 case BRIG_OPCODE_ABS
:
195 case BRIG_OPCODE_CLASS
:
196 case BRIG_OPCODE_COPYSIGN
:
203 /* Return the number of destination operands for this INSN. */
206 hsa_insn_basic::input_count ()
213 case BRIG_OPCODE_NOP
:
216 case BRIG_OPCODE_EXPAND
:
220 /* ld_v[234] not yet handled. */
226 case BRIG_OPCODE_ATOMICNORET
:
229 case BRIG_OPCODE_SIGNAL
:
232 case BRIG_OPCODE_SIGNALNORET
:
235 case BRIG_OPCODE_MEMFENCE
:
238 case BRIG_OPCODE_RDIMAGE
:
239 case BRIG_OPCODE_LDIMAGE
:
240 case BRIG_OPCODE_STIMAGE
:
241 case BRIG_OPCODE_QUERYIMAGE
:
242 case BRIG_OPCODE_QUERYSAMPLER
:
243 sorry ("HSA image ops not handled");
246 case BRIG_OPCODE_CBR
:
250 case BRIG_OPCODE_SBR
:
253 case BRIG_OPCODE_WAVEBARRIER
:
256 case BRIG_OPCODE_BARRIER
:
257 case BRIG_OPCODE_ARRIVEFBAR
:
258 case BRIG_OPCODE_INITFBAR
:
259 case BRIG_OPCODE_JOINFBAR
:
260 case BRIG_OPCODE_LEAVEFBAR
:
261 case BRIG_OPCODE_RELEASEFBAR
:
262 case BRIG_OPCODE_WAITFBAR
:
265 case BRIG_OPCODE_LDF
:
268 case BRIG_OPCODE_ACTIVELANECOUNT
:
269 case BRIG_OPCODE_ACTIVELANEID
:
270 case BRIG_OPCODE_ACTIVELANEMASK
:
271 case BRIG_OPCODE_ACTIVELANEPERMUTE
:
274 case BRIG_OPCODE_CALL
:
275 case BRIG_OPCODE_SCALL
:
276 case BRIG_OPCODE_ICALL
:
279 case BRIG_OPCODE_RET
:
282 case BRIG_OPCODE_ALLOCA
:
285 case BRIG_OPCODE_CLEARDETECTEXCEPT
:
288 case BRIG_OPCODE_SETDETECTEXCEPT
:
291 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
292 case BRIG_OPCODE_PACKETID
:
293 case BRIG_OPCODE_CASQUEUEWRITEINDEX
:
294 case BRIG_OPCODE_LDQUEUEREADINDEX
:
295 case BRIG_OPCODE_LDQUEUEWRITEINDEX
:
296 case BRIG_OPCODE_STQUEUEREADINDEX
:
297 case BRIG_OPCODE_STQUEUEWRITEINDEX
:
300 case BRIG_OPCODE_ADDQUEUEWRITEINDEX
:
303 case BRIG_OPCODE_DEBUGTRAP
:
306 case BRIG_OPCODE_GROUPBASEPTR
:
307 case BRIG_OPCODE_KERNARGBASEPTR
:
310 case HSA_OPCODE_ARG_BLOCK
:
313 case BRIG_KIND_DIRECTIVE_COMMENT
:
318 /* Return the number of source operands for this INSN. */
321 hsa_insn_basic::num_used_ops ()
323 gcc_checking_assert (input_count () <= operand_count ());
325 return operand_count () - input_count ();
328 /* Set alignment to VALUE. */
331 hsa_insn_mem::set_align (BrigAlignment8_t value
)
333 /* TODO: Perhaps remove this dump later on: */
334 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && value
< m_align
)
336 fprintf (dump_file
, "Decreasing alignment to %u in instruction ", value
);
337 dump_hsa_insn (dump_file
, this);
342 /* Return size of HSA type T in bits. */
345 hsa_type_bit_size (BrigType16_t t
)
368 case BRIG_TYPE_U16X2
:
370 case BRIG_TYPE_S16X2
:
371 case BRIG_TYPE_F16X2
:
379 case BRIG_TYPE_U16X4
:
380 case BRIG_TYPE_U32X2
:
382 case BRIG_TYPE_S16X4
:
383 case BRIG_TYPE_S32X2
:
384 case BRIG_TYPE_F16X4
:
385 case BRIG_TYPE_F32X2
:
390 case BRIG_TYPE_U8X16
:
391 case BRIG_TYPE_U16X8
:
392 case BRIG_TYPE_U32X4
:
393 case BRIG_TYPE_U64X2
:
394 case BRIG_TYPE_S8X16
:
395 case BRIG_TYPE_S16X8
:
396 case BRIG_TYPE_S32X4
:
397 case BRIG_TYPE_S64X2
:
398 case BRIG_TYPE_F16X8
:
399 case BRIG_TYPE_F32X4
:
400 case BRIG_TYPE_F64X2
:
404 gcc_assert (hsa_seen_error ());
409 /* Return BRIG bit-type with BITSIZE length. */
412 hsa_bittype_for_bitsize (unsigned bitsize
)
421 return BRIG_TYPE_B16
;
423 return BRIG_TYPE_B32
;
425 return BRIG_TYPE_B64
;
427 return BRIG_TYPE_B128
;
433 /* Return BRIG unsigned int type with BITSIZE length. */
436 hsa_uint_for_bitsize (unsigned bitsize
)
443 return BRIG_TYPE_U16
;
445 return BRIG_TYPE_U32
;
447 return BRIG_TYPE_U64
;
453 /* Return BRIG float type with BITSIZE length. */
456 hsa_float_for_bitsize (unsigned bitsize
)
461 return BRIG_TYPE_F16
;
463 return BRIG_TYPE_F32
;
465 return BRIG_TYPE_F64
;
471 /* Return HSA bit-type with the same size as the type T. */
474 hsa_bittype_for_type (BrigType16_t t
)
476 return hsa_bittype_for_bitsize (hsa_type_bit_size (t
));
479 /* Return HSA unsigned integer type with the same size as the type T. */
482 hsa_unsigned_type_for_type (BrigType16_t t
)
484 return hsa_uint_for_bitsize (hsa_type_bit_size (t
));
487 /* Return true if TYPE is a packed HSA type. */
490 hsa_type_packed_p (BrigType16_t type
)
492 return (type
& BRIG_TYPE_PACK_MASK
) != BRIG_TYPE_PACK_NONE
;
495 /* Return true if and only if TYPE is a floating point number type. */
498 hsa_type_float_p (BrigType16_t type
)
500 switch (type
& BRIG_TYPE_BASE_MASK
)
511 /* Return true if and only if TYPE is an integer number type. */
514 hsa_type_integer_p (BrigType16_t type
)
516 switch (type
& BRIG_TYPE_BASE_MASK
)
532 /* Return true if and only if TYPE is an bit-type. */
535 hsa_btype_p (BrigType16_t type
)
537 switch (type
& BRIG_TYPE_BASE_MASK
)
551 /* Return HSA alignment encoding alignment to N bits. */
554 hsa_alignment_encoding (unsigned n
)
556 gcc_assert (n
>= 8 && !(n
& (n
- 1)));
558 return BRIG_ALIGNMENT_32
;
563 return BRIG_ALIGNMENT_1
;
565 return BRIG_ALIGNMENT_2
;
567 return BRIG_ALIGNMENT_4
;
569 return BRIG_ALIGNMENT_8
;
571 return BRIG_ALIGNMENT_16
;
577 /* Return HSA alignment encoding alignment of T got
578 by get_object_alignment. */
581 hsa_object_alignment (tree t
)
583 return hsa_alignment_encoding (get_object_alignment (t
));
586 /* Return byte alignment for given BrigAlignment8_t value. */
589 hsa_byte_alignment (BrigAlignment8_t alignment
)
591 gcc_assert (alignment
!= BRIG_ALIGNMENT_NONE
);
593 return 1 << (alignment
- 1);
596 /* Return natural alignment of HSA TYPE. */
599 hsa_natural_alignment (BrigType16_t type
)
601 return hsa_alignment_encoding (hsa_type_bit_size (type
& ~BRIG_TYPE_ARRAY
));
604 /* Call the correct destructor of a HSA instruction. */
607 hsa_destroy_insn (hsa_insn_basic
*insn
)
609 if (hsa_insn_phi
*phi
= dyn_cast
<hsa_insn_phi
*> (insn
))
610 phi
->~hsa_insn_phi ();
611 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
612 br
->~hsa_insn_cbr ();
613 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
614 cmp
->~hsa_insn_cmp ();
615 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
616 mem
->~hsa_insn_mem ();
617 else if (hsa_insn_atomic
*atomic
= dyn_cast
<hsa_insn_atomic
*> (insn
))
618 atomic
->~hsa_insn_atomic ();
619 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
620 seg
->~hsa_insn_seg ();
621 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
622 call
->~hsa_insn_call ();
623 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
624 block
->~hsa_insn_arg_block ();
625 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
626 sbr
->~hsa_insn_sbr ();
627 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
629 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
630 comment
->~hsa_insn_comment ();
632 insn
->~hsa_insn_basic ();
635 /* Call the correct destructor of a HSA operand. */
638 hsa_destroy_operand (hsa_op_base
*op
)
640 if (hsa_op_code_list
*list
= dyn_cast
<hsa_op_code_list
*> (op
))
641 list
->~hsa_op_code_list ();
642 else if (hsa_op_operand_list
*list
= dyn_cast
<hsa_op_operand_list
*> (op
))
643 list
->~hsa_op_operand_list ();
644 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
646 else if (hsa_op_immed
*immed
= dyn_cast
<hsa_op_immed
*> (op
))
647 immed
->~hsa_op_immed ();
652 /* Create a mapping between the original function DECL and kernel name NAME. */
655 hsa_add_kern_decl_mapping (tree decl
, char *name
, unsigned omp_data_size
,
656 bool gridified_kernel_p
)
658 hsa_decl_kernel_map_element dkm
;
661 dkm
.omp_data_size
= omp_data_size
;
662 dkm
.gridified_kernel_p
= gridified_kernel_p
;
663 vec_safe_push (hsa_decl_kernel_mapping
, dkm
);
666 /* Return the number of kernel decl name mappings. */
669 hsa_get_number_decl_kernel_mappings (void)
671 return vec_safe_length (hsa_decl_kernel_mapping
);
674 /* Return the decl in the Ith kernel decl name mapping. */
677 hsa_get_decl_kernel_mapping_decl (unsigned i
)
679 return (*hsa_decl_kernel_mapping
)[i
].decl
;
682 /* Return the name in the Ith kernel decl name mapping. */
685 hsa_get_decl_kernel_mapping_name (unsigned i
)
687 return (*hsa_decl_kernel_mapping
)[i
].name
;
690 /* Return maximum OMP size for kernel decl name mapping. */
693 hsa_get_decl_kernel_mapping_omp_size (unsigned i
)
695 return (*hsa_decl_kernel_mapping
)[i
].omp_data_size
;
698 /* Return if the function is gridified kernel in decl name mapping. */
701 hsa_get_decl_kernel_mapping_gridified (unsigned i
)
703 return (*hsa_decl_kernel_mapping
)[i
].gridified_kernel_p
;
706 /* Free the mapping between original decls and kernel names. */
709 hsa_free_decl_kernel_mapping (void)
711 if (hsa_decl_kernel_mapping
== NULL
)
714 for (unsigned i
= 0; i
< hsa_decl_kernel_mapping
->length (); ++i
)
715 free ((*hsa_decl_kernel_mapping
)[i
].name
);
716 ggc_free (hsa_decl_kernel_mapping
);
719 /* Add new kernel dependency. */
722 hsa_add_kernel_dependency (tree caller
, const char *called_function
)
724 if (hsa_decl_kernel_dependencies
== NULL
)
725 hsa_decl_kernel_dependencies
= new hash_map
<tree
, vec
<const char *> *> ();
727 vec
<const char *> *s
= NULL
;
728 vec
<const char *> **slot
= hsa_decl_kernel_dependencies
->get (caller
);
731 s
= new vec
<const char *> ();
732 hsa_decl_kernel_dependencies
->put (caller
, s
);
737 s
->safe_push (called_function
);
740 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
741 order to minimize the number of GTY roots, we'll root them all in the
742 following array. The individual elements should only be accessed by the
743 very simple getters (of a pointer-to-tree) below. */
745 static GTY(()) tree hsa_tree_gt_roots
[3];
748 hsa_get_ctor_statements (void)
750 return &hsa_tree_gt_roots
[0];
754 hsa_get_dtor_statements (void)
756 return &hsa_tree_gt_roots
[1];
760 hsa_get_kernel_dispatch_type (void)
762 return &hsa_tree_gt_roots
[2];
765 /* Modify the name P in-place so that it is a valid HSA identifier. */
768 hsa_sanitize_name (char *p
)
771 if (*p
== '.' || *p
== '-')
775 /* Clone the name P, set trailing ampersand and sanitize the name. */
778 hsa_brig_function_name (const char *p
)
780 unsigned len
= strlen (p
);
781 char *buf
= XNEWVEC (char, len
+ 2);
785 memcpy (buf
+ 1, p
, len
);
787 hsa_sanitize_name (buf
);
791 /* Add a flatten attribute and disable vectorization for gpu implementation
792 function decl GDECL. */
794 void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl
)
796 DECL_ATTRIBUTES (gdecl
)
797 = tree_cons (get_identifier ("flatten"), NULL_TREE
,
798 DECL_ATTRIBUTES (gdecl
));
800 tree fn_opts
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
);
801 if (fn_opts
== NULL_TREE
)
802 fn_opts
= optimization_default_node
;
803 fn_opts
= copy_node (fn_opts
);
804 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_loop_vectorize
= false;
805 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_slp_vectorize
= false;
806 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
) = fn_opts
;
810 hsa_summary_t::link_functions (cgraph_node
*gpu
, cgraph_node
*host
,
811 hsa_function_kind kind
, bool gridified_kernel_p
)
813 hsa_function_summary
*gpu_summary
= get_create (gpu
);
814 hsa_function_summary
*host_summary
= get_create (host
);
816 gpu_summary
->m_kind
= kind
;
817 host_summary
->m_kind
= kind
;
819 gpu_summary
->m_gpu_implementation_p
= true;
820 host_summary
->m_gpu_implementation_p
= false;
822 gpu_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
823 host_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
825 gpu_summary
->m_bound_function
= host
;
826 host_summary
->m_bound_function
= gpu
;
828 process_gpu_implementation_attributes (gpu
->decl
);
830 /* Create reference between a kernel and a corresponding host implementation
831 to quarantee LTO streaming to a same LTRANS. */
832 if (kind
== HSA_KERNEL
)
833 gpu
->create_reference (host
, IPA_REF_ADDR
);
836 /* Add a HOST function to HSA summaries. */
839 hsa_register_kernel (cgraph_node
*host
)
841 if (hsa_summaries
== NULL
)
842 hsa_summaries
= new hsa_summary_t (symtab
);
843 hsa_function_summary
*s
= hsa_summaries
->get_create (host
);
844 s
->m_kind
= HSA_KERNEL
;
847 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
851 hsa_register_kernel (cgraph_node
*gpu
, cgraph_node
*host
)
853 if (hsa_summaries
== NULL
)
854 hsa_summaries
= new hsa_summary_t (symtab
);
855 hsa_summaries
->link_functions (gpu
, host
, HSA_KERNEL
, true);
858 /* Return true if expansion of the current HSA function has already failed. */
861 hsa_seen_error (void)
863 return hsa_cfun
->m_seen_error
;
866 /* Mark current HSA function as failed. */
871 hsa_failed_functions
->add (hsa_cfun
->m_decl
);
872 hsa_cfun
->m_seen_error
= true;
876 hsa_internal_fn::name ()
878 char *name
= xstrdup (internal_fn_name (m_fn
));
879 for (char *ptr
= name
; *ptr
; ptr
++)
880 *ptr
= TOLOWER (*ptr
);
882 const char *suffix
= NULL
;
883 if (m_type_bit_size
== 32)
888 char *name2
= concat (name
, suffix
, NULL
);
893 hsa_sanitize_name (name
);
898 hsa_internal_fn::get_arity ()
915 case IFN_SIGNIFICAND
:
941 /* As we produce sorry message for unknown internal functions,
942 reaching this label is definitely a bug. */
948 hsa_internal_fn::get_argument_type (int n
)
965 case IFN_SIGNIFICAND
:
981 return hsa_float_for_bitsize (m_type_bit_size
);
984 if (n
== -1 || n
== 0)
985 return hsa_float_for_bitsize (m_type_bit_size
);
987 return BRIG_TYPE_S32
;
990 /* As we produce sorry message for unknown internal functions,
991 reaching this label is definitely a bug. */
996 #include "gt-hsa-common.h"