1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
40 #include "internal-fn.h"
44 /* Structure containing intermediate HSA representation of the generated
46 class hsa_function_representation
*hsa_cfun
;
48 /* Element of the mapping vector between a host decl and an HSA kernel. */
50 struct GTY(()) hsa_decl_kernel_map_element
52 /* The decl of the host function. */
54 /* Name of the HSA kernel in BRIG. */
55 char * GTY((skip
)) name
;
56 /* Size of OMP data, if the kernel contains a kernel dispatch. */
57 unsigned omp_data_size
;
58 /* True if the function is gridified kernel. */
59 bool gridified_kernel_p
;
62 /* Mapping between decls and corresponding HSA kernels in this compilation
65 static GTY (()) vec
<hsa_decl_kernel_map_element
, va_gc
>
66 *hsa_decl_kernel_mapping
;
68 /* Mapping between decls and corresponding HSA kernels
69 called by the function. */
70 hash_map
<tree
, vec
<const char *> *> *hsa_decl_kernel_dependencies
;
72 /* Hash function to lookup a symbol for a decl. */
73 hash_table
<hsa_noop_symbol_hasher
> *hsa_global_variable_symbols
;
76 hsa_summary_t
*hsa_summaries
= NULL
;
78 /* HSA number of threads. */
79 hsa_symbol
*hsa_num_threads
= NULL
;
81 /* HSA function that cannot be expanded to HSAIL. */
82 hash_set
<tree
> *hsa_failed_functions
= NULL
;
84 /* True if compilation unit-wide data are already allocated and initialized. */
85 static bool compilation_unit_data_initialized
;
87 /* Return true if FNDECL represents an HSA-callable function. */
90 hsa_callable_function_p (tree fndecl
)
92 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl
))
93 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl
))
94 /* At this point, this is enough to identify clones for
95 parallel, which for HSA would need to be kernels anyway. */
96 && !DECL_ARTIFICIAL (fndecl
));
99 /* Allocate HSA structures that are are used when dealing with different
103 hsa_init_compilation_unit_data (void)
105 if (compilation_unit_data_initialized
)
108 compilation_unit_data_initialized
= true;
110 hsa_global_variable_symbols
= new hash_table
<hsa_noop_symbol_hasher
> (8);
111 hsa_failed_functions
= new hash_set
<tree
> ();
112 hsa_emitted_internal_decls
= new hash_table
<hsa_internal_fn_hasher
> (2);
115 /* Free data structures that are used when dealing with different
119 hsa_deinit_compilation_unit_data (void)
121 gcc_assert (compilation_unit_data_initialized
);
123 delete hsa_failed_functions
;
124 delete hsa_emitted_internal_decls
;
126 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
127 = hsa_global_variable_symbols
->begin ();
128 it
!= hsa_global_variable_symbols
->end ();
131 hsa_symbol
*sym
= *it
;
135 delete hsa_global_variable_symbols
;
139 delete hsa_num_threads
;
140 hsa_num_threads
= NULL
;
143 compilation_unit_data_initialized
= false;
146 /* Return true if we are generating large HSA machine model. */
149 hsa_machine_large_p (void)
151 /* FIXME: I suppose this is technically wrong but should work for me now. */
152 return (GET_MODE_BITSIZE (Pmode
) == 64);
155 /* Return the HSA profile we are using. */
158 hsa_full_profile_p (void)
163 /* Return true if a register in operand number OPNUM of instruction
164 is an output. False if it is an input. */
167 hsa_insn_basic::op_output_p (unsigned opnum
)
172 case BRIG_OPCODE_CBR
:
173 case BRIG_OPCODE_SBR
:
175 case BRIG_OPCODE_SIGNALNORET
:
176 case BRIG_OPCODE_DEBUGTRAP
:
177 /* FIXME: There are probably missing cases here, double check. */
179 case BRIG_OPCODE_EXPAND
:
180 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
181 return opnum
< operand_count () - 1;
187 /* Return true if OPCODE is an floating-point bit instruction opcode. */
190 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode
)
194 case BRIG_OPCODE_NEG
:
195 case BRIG_OPCODE_ABS
:
196 case BRIG_OPCODE_CLASS
:
197 case BRIG_OPCODE_COPYSIGN
:
204 /* Return the number of destination operands for this INSN. */
207 hsa_insn_basic::input_count ()
214 case BRIG_OPCODE_NOP
:
217 case BRIG_OPCODE_EXPAND
:
221 /* ld_v[234] not yet handled. */
227 case BRIG_OPCODE_ATOMICNORET
:
230 case BRIG_OPCODE_SIGNAL
:
233 case BRIG_OPCODE_SIGNALNORET
:
236 case BRIG_OPCODE_MEMFENCE
:
239 case BRIG_OPCODE_RDIMAGE
:
240 case BRIG_OPCODE_LDIMAGE
:
241 case BRIG_OPCODE_STIMAGE
:
242 case BRIG_OPCODE_QUERYIMAGE
:
243 case BRIG_OPCODE_QUERYSAMPLER
:
244 sorry ("HSA image ops not handled");
247 case BRIG_OPCODE_CBR
:
251 case BRIG_OPCODE_SBR
:
254 case BRIG_OPCODE_WAVEBARRIER
:
257 case BRIG_OPCODE_BARRIER
:
258 case BRIG_OPCODE_ARRIVEFBAR
:
259 case BRIG_OPCODE_INITFBAR
:
260 case BRIG_OPCODE_JOINFBAR
:
261 case BRIG_OPCODE_LEAVEFBAR
:
262 case BRIG_OPCODE_RELEASEFBAR
:
263 case BRIG_OPCODE_WAITFBAR
:
266 case BRIG_OPCODE_LDF
:
269 case BRIG_OPCODE_ACTIVELANECOUNT
:
270 case BRIG_OPCODE_ACTIVELANEID
:
271 case BRIG_OPCODE_ACTIVELANEMASK
:
272 case BRIG_OPCODE_ACTIVELANEPERMUTE
:
275 case BRIG_OPCODE_CALL
:
276 case BRIG_OPCODE_SCALL
:
277 case BRIG_OPCODE_ICALL
:
280 case BRIG_OPCODE_RET
:
283 case BRIG_OPCODE_ALLOCA
:
286 case BRIG_OPCODE_CLEARDETECTEXCEPT
:
289 case BRIG_OPCODE_SETDETECTEXCEPT
:
292 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
293 case BRIG_OPCODE_PACKETID
:
294 case BRIG_OPCODE_CASQUEUEWRITEINDEX
:
295 case BRIG_OPCODE_LDQUEUEREADINDEX
:
296 case BRIG_OPCODE_LDQUEUEWRITEINDEX
:
297 case BRIG_OPCODE_STQUEUEREADINDEX
:
298 case BRIG_OPCODE_STQUEUEWRITEINDEX
:
301 case BRIG_OPCODE_ADDQUEUEWRITEINDEX
:
304 case BRIG_OPCODE_DEBUGTRAP
:
307 case BRIG_OPCODE_GROUPBASEPTR
:
308 case BRIG_OPCODE_KERNARGBASEPTR
:
311 case HSA_OPCODE_ARG_BLOCK
:
314 case BRIG_KIND_DIRECTIVE_COMMENT
:
319 /* Return the number of source operands for this INSN. */
322 hsa_insn_basic::num_used_ops ()
324 gcc_checking_assert (input_count () <= operand_count ());
326 return operand_count () - input_count ();
329 /* Set alignment to VALUE. */
332 hsa_insn_mem::set_align (BrigAlignment8_t value
)
334 /* TODO: Perhaps remove this dump later on: */
335 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && value
< m_align
)
337 fprintf (dump_file
, "Decreasing alignment to %u in instruction ", value
);
338 dump_hsa_insn (dump_file
, this);
343 /* Return size of HSA type T in bits. */
346 hsa_type_bit_size (BrigType16_t t
)
369 case BRIG_TYPE_U16X2
:
371 case BRIG_TYPE_S16X2
:
372 case BRIG_TYPE_F16X2
:
380 case BRIG_TYPE_U16X4
:
381 case BRIG_TYPE_U32X2
:
383 case BRIG_TYPE_S16X4
:
384 case BRIG_TYPE_S32X2
:
385 case BRIG_TYPE_F16X4
:
386 case BRIG_TYPE_F32X2
:
391 case BRIG_TYPE_U8X16
:
392 case BRIG_TYPE_U16X8
:
393 case BRIG_TYPE_U32X4
:
394 case BRIG_TYPE_U64X2
:
395 case BRIG_TYPE_S8X16
:
396 case BRIG_TYPE_S16X8
:
397 case BRIG_TYPE_S32X4
:
398 case BRIG_TYPE_S64X2
:
399 case BRIG_TYPE_F16X8
:
400 case BRIG_TYPE_F32X4
:
401 case BRIG_TYPE_F64X2
:
405 gcc_assert (hsa_seen_error ());
410 /* Return BRIG bit-type with BITSIZE length. */
413 hsa_bittype_for_bitsize (unsigned bitsize
)
422 return BRIG_TYPE_B16
;
424 return BRIG_TYPE_B32
;
426 return BRIG_TYPE_B64
;
428 return BRIG_TYPE_B128
;
434 /* Return BRIG unsigned int type with BITSIZE length. */
437 hsa_uint_for_bitsize (unsigned bitsize
)
444 return BRIG_TYPE_U16
;
446 return BRIG_TYPE_U32
;
448 return BRIG_TYPE_U64
;
454 /* Return BRIG float type with BITSIZE length. */
457 hsa_float_for_bitsize (unsigned bitsize
)
462 return BRIG_TYPE_F16
;
464 return BRIG_TYPE_F32
;
466 return BRIG_TYPE_F64
;
472 /* Return HSA bit-type with the same size as the type T. */
475 hsa_bittype_for_type (BrigType16_t t
)
477 return hsa_bittype_for_bitsize (hsa_type_bit_size (t
));
480 /* Return HSA unsigned integer type with the same size as the type T. */
483 hsa_unsigned_type_for_type (BrigType16_t t
)
485 return hsa_uint_for_bitsize (hsa_type_bit_size (t
));
488 /* Return true if TYPE is a packed HSA type. */
491 hsa_type_packed_p (BrigType16_t type
)
493 return (type
& BRIG_TYPE_PACK_MASK
) != BRIG_TYPE_PACK_NONE
;
496 /* Return true if and only if TYPE is a floating point number type. */
499 hsa_type_float_p (BrigType16_t type
)
501 switch (type
& BRIG_TYPE_BASE_MASK
)
512 /* Return true if and only if TYPE is an integer number type. */
515 hsa_type_integer_p (BrigType16_t type
)
517 switch (type
& BRIG_TYPE_BASE_MASK
)
533 /* Return true if and only if TYPE is an bit-type. */
536 hsa_btype_p (BrigType16_t type
)
538 switch (type
& BRIG_TYPE_BASE_MASK
)
552 /* Return HSA alignment encoding alignment to N bits. */
555 hsa_alignment_encoding (unsigned n
)
557 gcc_assert (n
>= 8 && !(n
& (n
- 1)));
559 return BRIG_ALIGNMENT_32
;
564 return BRIG_ALIGNMENT_1
;
566 return BRIG_ALIGNMENT_2
;
568 return BRIG_ALIGNMENT_4
;
570 return BRIG_ALIGNMENT_8
;
572 return BRIG_ALIGNMENT_16
;
578 /* Return HSA alignment encoding alignment of T got
579 by get_object_alignment. */
582 hsa_object_alignment (tree t
)
584 return hsa_alignment_encoding (get_object_alignment (t
));
587 /* Return byte alignment for given BrigAlignment8_t value. */
590 hsa_byte_alignment (BrigAlignment8_t alignment
)
592 gcc_assert (alignment
!= BRIG_ALIGNMENT_NONE
);
594 return 1 << (alignment
- 1);
597 /* Return natural alignment of HSA TYPE. */
600 hsa_natural_alignment (BrigType16_t type
)
602 return hsa_alignment_encoding (hsa_type_bit_size (type
& ~BRIG_TYPE_ARRAY
));
605 /* Call the correct destructor of a HSA instruction. */
608 hsa_destroy_insn (hsa_insn_basic
*insn
)
610 if (hsa_insn_phi
*phi
= dyn_cast
<hsa_insn_phi
*> (insn
))
611 phi
->~hsa_insn_phi ();
612 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
613 br
->~hsa_insn_cbr ();
614 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
615 cmp
->~hsa_insn_cmp ();
616 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
617 mem
->~hsa_insn_mem ();
618 else if (hsa_insn_atomic
*atomic
= dyn_cast
<hsa_insn_atomic
*> (insn
))
619 atomic
->~hsa_insn_atomic ();
620 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
621 seg
->~hsa_insn_seg ();
622 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
623 call
->~hsa_insn_call ();
624 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
625 block
->~hsa_insn_arg_block ();
626 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
627 sbr
->~hsa_insn_sbr ();
628 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
630 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
631 comment
->~hsa_insn_comment ();
633 insn
->~hsa_insn_basic ();
636 /* Call the correct destructor of a HSA operand. */
639 hsa_destroy_operand (hsa_op_base
*op
)
641 if (hsa_op_code_list
*list
= dyn_cast
<hsa_op_code_list
*> (op
))
642 list
->~hsa_op_code_list ();
643 else if (hsa_op_operand_list
*list
= dyn_cast
<hsa_op_operand_list
*> (op
))
644 list
->~hsa_op_operand_list ();
645 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
647 else if (hsa_op_immed
*immed
= dyn_cast
<hsa_op_immed
*> (op
))
648 immed
->~hsa_op_immed ();
653 /* Create a mapping between the original function DECL and kernel name NAME. */
656 hsa_add_kern_decl_mapping (tree decl
, char *name
, unsigned omp_data_size
,
657 bool gridified_kernel_p
)
659 hsa_decl_kernel_map_element dkm
;
662 dkm
.omp_data_size
= omp_data_size
;
663 dkm
.gridified_kernel_p
= gridified_kernel_p
;
664 vec_safe_push (hsa_decl_kernel_mapping
, dkm
);
667 /* Return the number of kernel decl name mappings. */
670 hsa_get_number_decl_kernel_mappings (void)
672 return vec_safe_length (hsa_decl_kernel_mapping
);
675 /* Return the decl in the Ith kernel decl name mapping. */
678 hsa_get_decl_kernel_mapping_decl (unsigned i
)
680 return (*hsa_decl_kernel_mapping
)[i
].decl
;
683 /* Return the name in the Ith kernel decl name mapping. */
686 hsa_get_decl_kernel_mapping_name (unsigned i
)
688 return (*hsa_decl_kernel_mapping
)[i
].name
;
691 /* Return maximum OMP size for kernel decl name mapping. */
694 hsa_get_decl_kernel_mapping_omp_size (unsigned i
)
696 return (*hsa_decl_kernel_mapping
)[i
].omp_data_size
;
699 /* Return if the function is gridified kernel in decl name mapping. */
702 hsa_get_decl_kernel_mapping_gridified (unsigned i
)
704 return (*hsa_decl_kernel_mapping
)[i
].gridified_kernel_p
;
707 /* Free the mapping between original decls and kernel names. */
710 hsa_free_decl_kernel_mapping (void)
712 if (hsa_decl_kernel_mapping
== NULL
)
715 for (unsigned i
= 0; i
< hsa_decl_kernel_mapping
->length (); ++i
)
716 free ((*hsa_decl_kernel_mapping
)[i
].name
);
717 ggc_free (hsa_decl_kernel_mapping
);
720 /* Add new kernel dependency. */
723 hsa_add_kernel_dependency (tree caller
, const char *called_function
)
725 if (hsa_decl_kernel_dependencies
== NULL
)
726 hsa_decl_kernel_dependencies
= new hash_map
<tree
, vec
<const char *> *> ();
728 vec
<const char *> *s
= NULL
;
729 vec
<const char *> **slot
= hsa_decl_kernel_dependencies
->get (caller
);
732 s
= new vec
<const char *> ();
733 hsa_decl_kernel_dependencies
->put (caller
, s
);
738 s
->safe_push (called_function
);
741 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
742 order to minimize the number of GTY roots, we'll root them all in the
743 following array. The individual elements should only be accessed by the
744 very simple getters (of a pointer-to-tree) below. */
746 static GTY(()) tree hsa_tree_gt_roots
[3];
749 hsa_get_ctor_statements (void)
751 return &hsa_tree_gt_roots
[0];
755 hsa_get_dtor_statements (void)
757 return &hsa_tree_gt_roots
[1];
761 hsa_get_kernel_dispatch_type (void)
763 return &hsa_tree_gt_roots
[2];
766 /* Modify the name P in-place so that it is a valid HSA identifier. */
769 hsa_sanitize_name (char *p
)
772 if (*p
== '.' || *p
== '-')
776 /* Clone the name P, set trailing ampersand and sanitize the name. */
779 hsa_brig_function_name (const char *p
)
781 unsigned len
= strlen (p
);
782 char *buf
= XNEWVEC (char, len
+ 2);
786 memcpy (buf
+ 1, p
, len
);
788 hsa_sanitize_name (buf
);
792 /* Add a flatten attribute and disable vectorization for gpu implementation
793 function decl GDECL. */
795 void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl
)
797 DECL_ATTRIBUTES (gdecl
)
798 = tree_cons (get_identifier ("flatten"), NULL_TREE
,
799 DECL_ATTRIBUTES (gdecl
));
801 tree fn_opts
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
);
802 if (fn_opts
== NULL_TREE
)
803 fn_opts
= optimization_default_node
;
804 fn_opts
= copy_node (fn_opts
);
805 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_loop_vectorize
= false;
806 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_slp_vectorize
= false;
807 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
) = fn_opts
;
811 hsa_summary_t::link_functions (cgraph_node
*gpu
, cgraph_node
*host
,
812 hsa_function_kind kind
, bool gridified_kernel_p
)
814 hsa_function_summary
*gpu_summary
= get (gpu
);
815 hsa_function_summary
*host_summary
= get (host
);
817 gpu_summary
->m_kind
= kind
;
818 host_summary
->m_kind
= kind
;
820 gpu_summary
->m_gpu_implementation_p
= true;
821 host_summary
->m_gpu_implementation_p
= false;
823 gpu_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
824 host_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
826 gpu_summary
->m_bound_function
= host
;
827 host_summary
->m_bound_function
= gpu
;
829 process_gpu_implementation_attributes (gpu
->decl
);
831 /* Create reference between a kernel and a corresponding host implementation
832 to quarantee LTO streaming to a same LTRANS. */
833 if (kind
== HSA_KERNEL
)
834 gpu
->create_reference (host
, IPA_REF_ADDR
);
837 /* Add a HOST function to HSA summaries. */
840 hsa_register_kernel (cgraph_node
*host
)
842 if (hsa_summaries
== NULL
)
843 hsa_summaries
= new hsa_summary_t (symtab
);
844 hsa_function_summary
*s
= hsa_summaries
->get (host
);
845 s
->m_kind
= HSA_KERNEL
;
848 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
852 hsa_register_kernel (cgraph_node
*gpu
, cgraph_node
*host
)
854 if (hsa_summaries
== NULL
)
855 hsa_summaries
= new hsa_summary_t (symtab
);
856 hsa_summaries
->link_functions (gpu
, host
, HSA_KERNEL
, true);
859 /* Return true if expansion of the current HSA function has already failed. */
862 hsa_seen_error (void)
864 return hsa_cfun
->m_seen_error
;
867 /* Mark current HSA function as failed. */
872 hsa_failed_functions
->add (hsa_cfun
->m_decl
);
873 hsa_cfun
->m_seen_error
= true;
877 hsa_internal_fn::name ()
879 char *name
= xstrdup (internal_fn_name (m_fn
));
880 for (char *ptr
= name
; *ptr
; ptr
++)
881 *ptr
= TOLOWER (*ptr
);
883 const char *suffix
= NULL
;
884 if (m_type_bit_size
== 32)
889 char *name2
= concat (name
, suffix
, NULL
);
894 hsa_sanitize_name (name
);
899 hsa_internal_fn::get_arity ()
916 case IFN_SIGNIFICAND
:
942 /* As we produce sorry message for unknown internal functions,
943 reaching this label is definitely a bug. */
949 hsa_internal_fn::get_argument_type (int n
)
966 case IFN_SIGNIFICAND
:
982 return hsa_float_for_bitsize (m_type_bit_size
);
985 if (n
== -1 || n
== 0)
986 return hsa_float_for_bitsize (m_type_bit_size
);
988 return BRIG_TYPE_S32
;
991 /* As we produce sorry message for unknown internal functions,
992 reaching this label is definitely a bug. */