1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
40 #include "internal-fn.h"
43 /* Structure containing intermediate HSA representation of the generated
45 class hsa_function_representation
*hsa_cfun
;
47 /* Element of the mapping vector between a host decl and an HSA kernel. */
49 struct GTY(()) hsa_decl_kernel_map_element
51 /* The decl of the host function. */
53 /* Name of the HSA kernel in BRIG. */
54 char * GTY((skip
)) name
;
55 /* Size of OMP data, if the kernel contains a kernel dispatch. */
56 unsigned omp_data_size
;
57 /* True if the function is gridified kernel. */
58 bool gridified_kernel_p
;
61 /* Mapping between decls and corresponding HSA kernels in this compilation
64 static GTY (()) vec
<hsa_decl_kernel_map_element
, va_gc
>
65 *hsa_decl_kernel_mapping
;
67 /* Mapping between decls and corresponding HSA kernels
68 called by the function. */
69 hash_map
<tree
, vec
<const char *> *> *hsa_decl_kernel_dependencies
;
71 /* Hash function to lookup a symbol for a decl. */
72 hash_table
<hsa_noop_symbol_hasher
> *hsa_global_variable_symbols
;
75 hsa_summary_t
*hsa_summaries
= NULL
;
77 /* HSA number of threads. */
78 hsa_symbol
*hsa_num_threads
= NULL
;
80 /* HSA function that cannot be expanded to HSAIL. */
81 hash_set
<tree
> *hsa_failed_functions
= NULL
;
83 /* True if compilation unit-wide data are already allocated and initialized. */
84 static bool compilation_unit_data_initialized
;
86 /* Return true if FNDECL represents an HSA-callable function. */
89 hsa_callable_function_p (tree fndecl
)
91 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl
))
92 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl
)));
95 /* Allocate HSA structures that are are used when dealing with different
99 hsa_init_compilation_unit_data (void)
101 if (compilation_unit_data_initialized
)
104 compilation_unit_data_initialized
= true;
106 hsa_global_variable_symbols
= new hash_table
<hsa_noop_symbol_hasher
> (8);
107 hsa_failed_functions
= new hash_set
<tree
> ();
108 hsa_emitted_internal_decls
= new hash_table
<hsa_internal_fn_hasher
> (2);
111 /* Free data structures that are used when dealing with different
115 hsa_deinit_compilation_unit_data (void)
117 gcc_assert (compilation_unit_data_initialized
);
119 delete hsa_failed_functions
;
120 delete hsa_emitted_internal_decls
;
122 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
123 = hsa_global_variable_symbols
->begin ();
124 it
!= hsa_global_variable_symbols
->end ();
127 hsa_symbol
*sym
= *it
;
131 delete hsa_global_variable_symbols
;
135 delete hsa_num_threads
;
136 hsa_num_threads
= NULL
;
139 compilation_unit_data_initialized
= false;
142 /* Return true if we are generating large HSA machine model. */
145 hsa_machine_large_p (void)
147 /* FIXME: I suppose this is technically wrong but should work for me now. */
148 return (GET_MODE_BITSIZE (Pmode
) == 64);
151 /* Return the HSA profile we are using. */
154 hsa_full_profile_p (void)
159 /* Return true if a register in operand number OPNUM of instruction
160 is an output. False if it is an input. */
163 hsa_insn_basic::op_output_p (unsigned opnum
)
168 case BRIG_OPCODE_CBR
:
169 case BRIG_OPCODE_SBR
:
171 case BRIG_OPCODE_SIGNALNORET
:
172 /* FIXME: There are probably missing cases here, double check. */
174 case BRIG_OPCODE_EXPAND
:
175 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
176 return opnum
< operand_count () - 1;
182 /* Return true if OPCODE is an floating-point bit instruction opcode. */
185 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode
)
189 case BRIG_OPCODE_NEG
:
190 case BRIG_OPCODE_ABS
:
191 case BRIG_OPCODE_CLASS
:
192 case BRIG_OPCODE_COPYSIGN
:
199 /* Return the number of destination operands for this INSN. */
202 hsa_insn_basic::input_count ()
209 case BRIG_OPCODE_NOP
:
212 case BRIG_OPCODE_EXPAND
:
216 /* ld_v[234] not yet handled. */
222 case BRIG_OPCODE_ATOMICNORET
:
225 case BRIG_OPCODE_SIGNAL
:
228 case BRIG_OPCODE_SIGNALNORET
:
231 case BRIG_OPCODE_MEMFENCE
:
234 case BRIG_OPCODE_RDIMAGE
:
235 case BRIG_OPCODE_LDIMAGE
:
236 case BRIG_OPCODE_STIMAGE
:
237 case BRIG_OPCODE_QUERYIMAGE
:
238 case BRIG_OPCODE_QUERYSAMPLER
:
239 sorry ("HSA image ops not handled");
242 case BRIG_OPCODE_CBR
:
246 case BRIG_OPCODE_SBR
:
249 case BRIG_OPCODE_WAVEBARRIER
:
252 case BRIG_OPCODE_BARRIER
:
253 case BRIG_OPCODE_ARRIVEFBAR
:
254 case BRIG_OPCODE_INITFBAR
:
255 case BRIG_OPCODE_JOINFBAR
:
256 case BRIG_OPCODE_LEAVEFBAR
:
257 case BRIG_OPCODE_RELEASEFBAR
:
258 case BRIG_OPCODE_WAITFBAR
:
261 case BRIG_OPCODE_LDF
:
264 case BRIG_OPCODE_ACTIVELANECOUNT
:
265 case BRIG_OPCODE_ACTIVELANEID
:
266 case BRIG_OPCODE_ACTIVELANEMASK
:
267 case BRIG_OPCODE_ACTIVELANEPERMUTE
:
270 case BRIG_OPCODE_CALL
:
271 case BRIG_OPCODE_SCALL
:
272 case BRIG_OPCODE_ICALL
:
275 case BRIG_OPCODE_RET
:
278 case BRIG_OPCODE_ALLOCA
:
281 case BRIG_OPCODE_CLEARDETECTEXCEPT
:
284 case BRIG_OPCODE_SETDETECTEXCEPT
:
287 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
288 case BRIG_OPCODE_PACKETID
:
289 case BRIG_OPCODE_CASQUEUEWRITEINDEX
:
290 case BRIG_OPCODE_LDQUEUEREADINDEX
:
291 case BRIG_OPCODE_LDQUEUEWRITEINDEX
:
292 case BRIG_OPCODE_STQUEUEREADINDEX
:
293 case BRIG_OPCODE_STQUEUEWRITEINDEX
:
296 case BRIG_OPCODE_ADDQUEUEWRITEINDEX
:
299 case BRIG_OPCODE_DEBUGTRAP
:
302 case BRIG_OPCODE_GROUPBASEPTR
:
303 case BRIG_OPCODE_KERNARGBASEPTR
:
306 case HSA_OPCODE_ARG_BLOCK
:
309 case BRIG_KIND_DIRECTIVE_COMMENT
:
314 /* Return the number of source operands for this INSN. */
317 hsa_insn_basic::num_used_ops ()
319 gcc_checking_assert (input_count () <= operand_count ());
321 return operand_count () - input_count ();
324 /* Set alignment to VALUE. */
327 hsa_insn_mem::set_align (BrigAlignment8_t value
)
329 /* TODO: Perhaps remove this dump later on: */
330 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && value
< m_align
)
332 fprintf (dump_file
, "Decreasing alignment to %u in instruction ", value
);
333 dump_hsa_insn (dump_file
, this);
338 /* Return size of HSA type T in bits. */
341 hsa_type_bit_size (BrigType16_t t
)
364 case BRIG_TYPE_U16X2
:
366 case BRIG_TYPE_S16X2
:
367 case BRIG_TYPE_F16X2
:
375 case BRIG_TYPE_U16X4
:
376 case BRIG_TYPE_U32X2
:
378 case BRIG_TYPE_S16X4
:
379 case BRIG_TYPE_S32X2
:
380 case BRIG_TYPE_F16X4
:
381 case BRIG_TYPE_F32X2
:
386 case BRIG_TYPE_U8X16
:
387 case BRIG_TYPE_U16X8
:
388 case BRIG_TYPE_U32X4
:
389 case BRIG_TYPE_U64X2
:
390 case BRIG_TYPE_S8X16
:
391 case BRIG_TYPE_S16X8
:
392 case BRIG_TYPE_S32X4
:
393 case BRIG_TYPE_S64X2
:
394 case BRIG_TYPE_F16X8
:
395 case BRIG_TYPE_F32X4
:
396 case BRIG_TYPE_F64X2
:
400 gcc_assert (hsa_seen_error ());
405 /* Return BRIG bit-type with BITSIZE length. */
408 hsa_bittype_for_bitsize (unsigned bitsize
)
417 return BRIG_TYPE_B16
;
419 return BRIG_TYPE_B32
;
421 return BRIG_TYPE_B64
;
423 return BRIG_TYPE_B128
;
429 /* Return BRIG unsigned int type with BITSIZE length. */
432 hsa_uint_for_bitsize (unsigned bitsize
)
439 return BRIG_TYPE_U16
;
441 return BRIG_TYPE_U32
;
443 return BRIG_TYPE_U64
;
449 /* Return BRIG float type with BITSIZE length. */
452 hsa_float_for_bitsize (unsigned bitsize
)
457 return BRIG_TYPE_F16
;
459 return BRIG_TYPE_F32
;
461 return BRIG_TYPE_F64
;
467 /* Return HSA bit-type with the same size as the type T. */
470 hsa_bittype_for_type (BrigType16_t t
)
472 return hsa_bittype_for_bitsize (hsa_type_bit_size (t
));
475 /* Return HSA unsigned integer type with the same size as the type T. */
478 hsa_unsigned_type_for_type (BrigType16_t t
)
480 return hsa_uint_for_bitsize (hsa_type_bit_size (t
));
483 /* Return true if TYPE is a packed HSA type. */
486 hsa_type_packed_p (BrigType16_t type
)
488 return (type
& BRIG_TYPE_PACK_MASK
) != BRIG_TYPE_PACK_NONE
;
491 /* Return true if and only if TYPE is a floating point number type. */
494 hsa_type_float_p (BrigType16_t type
)
496 switch (type
& BRIG_TYPE_BASE_MASK
)
507 /* Return true if and only if TYPE is an integer number type. */
510 hsa_type_integer_p (BrigType16_t type
)
512 switch (type
& BRIG_TYPE_BASE_MASK
)
528 /* Return true if and only if TYPE is an bit-type. */
531 hsa_btype_p (BrigType16_t type
)
533 switch (type
& BRIG_TYPE_BASE_MASK
)
547 /* Return HSA alignment encoding alignment to N bits. */
550 hsa_alignment_encoding (unsigned n
)
552 gcc_assert (n
>= 8 && !(n
& (n
- 1)));
554 return BRIG_ALIGNMENT_32
;
559 return BRIG_ALIGNMENT_1
;
561 return BRIG_ALIGNMENT_2
;
563 return BRIG_ALIGNMENT_4
;
565 return BRIG_ALIGNMENT_8
;
567 return BRIG_ALIGNMENT_16
;
573 /* Return natural alignment of HSA TYPE. */
576 hsa_natural_alignment (BrigType16_t type
)
578 return hsa_alignment_encoding (hsa_type_bit_size (type
& ~BRIG_TYPE_ARRAY
));
581 /* Call the correct destructor of a HSA instruction. */
584 hsa_destroy_insn (hsa_insn_basic
*insn
)
586 if (hsa_insn_phi
*phi
= dyn_cast
<hsa_insn_phi
*> (insn
))
587 phi
->~hsa_insn_phi ();
588 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
590 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
591 cmp
->~hsa_insn_cmp ();
592 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
593 mem
->~hsa_insn_mem ();
594 else if (hsa_insn_atomic
*atomic
= dyn_cast
<hsa_insn_atomic
*> (insn
))
595 atomic
->~hsa_insn_atomic ();
596 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
597 seg
->~hsa_insn_seg ();
598 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
599 call
->~hsa_insn_call ();
600 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
601 block
->~hsa_insn_arg_block ();
602 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
603 sbr
->~hsa_insn_sbr ();
604 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
605 comment
->~hsa_insn_comment ();
607 insn
->~hsa_insn_basic ();
610 /* Call the correct destructor of a HSA operand. */
613 hsa_destroy_operand (hsa_op_base
*op
)
615 if (hsa_op_code_list
*list
= dyn_cast
<hsa_op_code_list
*> (op
))
616 list
->~hsa_op_code_list ();
617 else if (hsa_op_operand_list
*list
= dyn_cast
<hsa_op_operand_list
*> (op
))
618 list
->~hsa_op_operand_list ();
619 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
621 else if (hsa_op_immed
*immed
= dyn_cast
<hsa_op_immed
*> (op
))
622 immed
->~hsa_op_immed ();
627 /* Create a mapping between the original function DECL and kernel name NAME. */
630 hsa_add_kern_decl_mapping (tree decl
, char *name
, unsigned omp_data_size
,
631 bool gridified_kernel_p
)
633 hsa_decl_kernel_map_element dkm
;
636 dkm
.omp_data_size
= omp_data_size
;
637 dkm
.gridified_kernel_p
= gridified_kernel_p
;
638 vec_safe_push (hsa_decl_kernel_mapping
, dkm
);
641 /* Return the number of kernel decl name mappings. */
644 hsa_get_number_decl_kernel_mappings (void)
646 return vec_safe_length (hsa_decl_kernel_mapping
);
649 /* Return the decl in the Ith kernel decl name mapping. */
652 hsa_get_decl_kernel_mapping_decl (unsigned i
)
654 return (*hsa_decl_kernel_mapping
)[i
].decl
;
657 /* Return the name in the Ith kernel decl name mapping. */
660 hsa_get_decl_kernel_mapping_name (unsigned i
)
662 return (*hsa_decl_kernel_mapping
)[i
].name
;
665 /* Return maximum OMP size for kernel decl name mapping. */
668 hsa_get_decl_kernel_mapping_omp_size (unsigned i
)
670 return (*hsa_decl_kernel_mapping
)[i
].omp_data_size
;
673 /* Return if the function is gridified kernel in decl name mapping. */
676 hsa_get_decl_kernel_mapping_gridified (unsigned i
)
678 return (*hsa_decl_kernel_mapping
)[i
].gridified_kernel_p
;
681 /* Free the mapping between original decls and kernel names. */
684 hsa_free_decl_kernel_mapping (void)
686 if (hsa_decl_kernel_mapping
== NULL
)
689 for (unsigned i
= 0; i
< hsa_decl_kernel_mapping
->length (); ++i
)
690 free ((*hsa_decl_kernel_mapping
)[i
].name
);
691 ggc_free (hsa_decl_kernel_mapping
);
694 /* Add new kernel dependency. */
697 hsa_add_kernel_dependency (tree caller
, const char *called_function
)
699 if (hsa_decl_kernel_dependencies
== NULL
)
700 hsa_decl_kernel_dependencies
= new hash_map
<tree
, vec
<const char *> *> ();
702 vec
<const char *> *s
= NULL
;
703 vec
<const char *> **slot
= hsa_decl_kernel_dependencies
->get (caller
);
706 s
= new vec
<const char *> ();
707 hsa_decl_kernel_dependencies
->put (caller
, s
);
712 s
->safe_push (called_function
);
715 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
716 order to minimize the number of GTY roots, we'll root them all in the
717 following array. The individual elements should only be accessed by the
718 very simple getters (of a pointer-to-tree) below. */
720 static GTY(()) tree hsa_tree_gt_roots
[3];
723 hsa_get_ctor_statements (void)
725 return &hsa_tree_gt_roots
[0];
729 hsa_get_dtor_statements (void)
731 return &hsa_tree_gt_roots
[1];
735 hsa_get_kernel_dispatch_type (void)
737 return &hsa_tree_gt_roots
[2];
740 /* Modify the name P in-place so that it is a valid HSA identifier. */
743 hsa_sanitize_name (char *p
)
746 if (*p
== '.' || *p
== '-')
750 /* Clone the name P, set trailing ampersand and sanitize the name. */
753 hsa_brig_function_name (const char *p
)
755 unsigned len
= strlen (p
);
756 char *buf
= XNEWVEC (char, len
+ 2);
760 memcpy (buf
+ 1, p
, len
);
762 hsa_sanitize_name (buf
);
766 /* Return declaration name if exists. */
769 hsa_get_declaration_name (tree decl
)
771 if (!DECL_NAME (decl
))
774 snprintf (buf
, 64, "__hsa_anonymous_%i", DECL_UID (decl
));
775 const char *ggc_str
= ggc_strdup (buf
);
780 if (TREE_CODE (decl
) == FUNCTION_DECL
781 || (TREE_CODE (decl
) == VAR_DECL
&& is_global_var (decl
)))
782 name_tree
= DECL_ASSEMBLER_NAME (decl
);
784 name_tree
= DECL_NAME (decl
);
786 const char *name
= IDENTIFIER_POINTER (name_tree
);
787 /* User-defined assembly names have prepended asterisk symbol. */
795 hsa_summary_t::link_functions (cgraph_node
*gpu
, cgraph_node
*host
,
796 hsa_function_kind kind
, bool gridified_kernel_p
)
798 hsa_function_summary
*gpu_summary
= get (gpu
);
799 hsa_function_summary
*host_summary
= get (host
);
801 gpu_summary
->m_kind
= kind
;
802 host_summary
->m_kind
= kind
;
804 gpu_summary
->m_gpu_implementation_p
= true;
805 host_summary
->m_gpu_implementation_p
= false;
807 gpu_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
808 host_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
810 gpu_summary
->m_binded_function
= host
;
811 host_summary
->m_binded_function
= gpu
;
813 tree gdecl
= gpu
->decl
;
814 DECL_ATTRIBUTES (gdecl
)
815 = tree_cons (get_identifier ("flatten"), NULL_TREE
,
816 DECL_ATTRIBUTES (gdecl
));
818 tree fn_opts
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
);
819 if (fn_opts
== NULL_TREE
)
820 fn_opts
= optimization_default_node
;
821 fn_opts
= copy_node (fn_opts
);
822 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_loop_vectorize
= false;
823 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_slp_vectorize
= false;
824 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
) = fn_opts
;
826 /* Create reference between a kernel and a corresponding host implementation
827 to quarantee LTO streaming to a same LTRANS. */
828 if (kind
== HSA_KERNEL
)
829 gpu
->create_reference (host
, IPA_REF_ADDR
);
832 /* Add a HOST function to HSA summaries. */
835 hsa_register_kernel (cgraph_node
*host
)
837 if (hsa_summaries
== NULL
)
838 hsa_summaries
= new hsa_summary_t (symtab
);
839 hsa_function_summary
*s
= hsa_summaries
->get (host
);
840 s
->m_kind
= HSA_KERNEL
;
843 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
847 hsa_register_kernel (cgraph_node
*gpu
, cgraph_node
*host
)
849 if (hsa_summaries
== NULL
)
850 hsa_summaries
= new hsa_summary_t (symtab
);
851 hsa_summaries
->link_functions (gpu
, host
, HSA_KERNEL
, true);
854 /* Return true if expansion of the current HSA function has already failed. */
857 hsa_seen_error (void)
859 return hsa_cfun
->m_seen_error
;
862 /* Mark current HSA function as failed. */
867 hsa_failed_functions
->add (hsa_cfun
->m_decl
);
868 hsa_cfun
->m_seen_error
= true;
872 hsa_internal_fn::name ()
874 char *name
= xstrdup (internal_fn_name (m_fn
));
875 for (char *ptr
= name
; *ptr
; ptr
++)
876 *ptr
= TOLOWER (*ptr
);
878 const char *suffix
= NULL
;
879 if (m_type_bit_size
== 32)
884 char *name2
= concat (name
, suffix
, NULL
);
889 hsa_sanitize_name (name
);
894 hsa_internal_fn::get_arity ()
911 case IFN_SIGNIFICAND
:
938 /* As we produce sorry message for unknown internal functions,
939 reaching this label is definitely a bug. */
945 hsa_internal_fn::get_argument_type (int n
)
962 case IFN_SIGNIFICAND
:
978 return hsa_float_for_bitsize (m_type_bit_size
);
981 if (n
== -1 || n
== 0)
982 return hsa_float_for_bitsize (m_type_bit_size
);
984 return BRIG_TYPE_S32
;
987 /* As we produce sorry message for unknown internal functions,
988 reaching this label is definitely a bug. */