1 /* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "gimple-pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "alloc-pool.h"
36 #include "print-tree.h"
37 #include "stringpool.h"
38 #include "symbol-summary.h"
40 #include "internal-fn.h"
44 /* Structure containing intermediate HSA representation of the generated
46 class hsa_function_representation
*hsa_cfun
;
48 /* Element of the mapping vector between a host decl and an HSA kernel. */
50 struct GTY(()) hsa_decl_kernel_map_element
52 /* The decl of the host function. */
54 /* Name of the HSA kernel in BRIG. */
55 char * GTY((skip
)) name
;
56 /* Size of OMP data, if the kernel contains a kernel dispatch. */
57 unsigned omp_data_size
;
58 /* True if the function is gridified kernel. */
59 bool gridified_kernel_p
;
62 /* Mapping between decls and corresponding HSA kernels in this compilation
65 static GTY (()) vec
<hsa_decl_kernel_map_element
, va_gc
>
66 *hsa_decl_kernel_mapping
;
68 /* Mapping between decls and corresponding HSA kernels
69 called by the function. */
70 hash_map
<tree
, vec
<const char *> *> *hsa_decl_kernel_dependencies
;
72 /* Hash function to lookup a symbol for a decl. */
73 hash_table
<hsa_noop_symbol_hasher
> *hsa_global_variable_symbols
;
76 hsa_summary_t
*hsa_summaries
= NULL
;
78 /* HSA number of threads. */
79 hsa_symbol
*hsa_num_threads
= NULL
;
81 /* HSA function that cannot be expanded to HSAIL. */
82 hash_set
<tree
> *hsa_failed_functions
= NULL
;
84 /* True if compilation unit-wide data are already allocated and initialized. */
85 static bool compilation_unit_data_initialized
;
87 /* Return true if FNDECL represents an HSA-callable function. */
90 hsa_callable_function_p (tree fndecl
)
92 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl
))
93 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl
)));
96 /* Allocate HSA structures that are are used when dealing with different
100 hsa_init_compilation_unit_data (void)
102 if (compilation_unit_data_initialized
)
105 compilation_unit_data_initialized
= true;
107 hsa_global_variable_symbols
= new hash_table
<hsa_noop_symbol_hasher
> (8);
108 hsa_failed_functions
= new hash_set
<tree
> ();
109 hsa_emitted_internal_decls
= new hash_table
<hsa_internal_fn_hasher
> (2);
112 /* Free data structures that are used when dealing with different
116 hsa_deinit_compilation_unit_data (void)
118 gcc_assert (compilation_unit_data_initialized
);
120 delete hsa_failed_functions
;
121 delete hsa_emitted_internal_decls
;
123 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
124 = hsa_global_variable_symbols
->begin ();
125 it
!= hsa_global_variable_symbols
->end ();
128 hsa_symbol
*sym
= *it
;
132 delete hsa_global_variable_symbols
;
136 delete hsa_num_threads
;
137 hsa_num_threads
= NULL
;
140 compilation_unit_data_initialized
= false;
143 /* Return true if we are generating large HSA machine model. */
146 hsa_machine_large_p (void)
148 /* FIXME: I suppose this is technically wrong but should work for me now. */
149 return (GET_MODE_BITSIZE (Pmode
) == 64);
152 /* Return the HSA profile we are using. */
155 hsa_full_profile_p (void)
160 /* Return true if a register in operand number OPNUM of instruction
161 is an output. False if it is an input. */
164 hsa_insn_basic::op_output_p (unsigned opnum
)
169 case BRIG_OPCODE_CBR
:
170 case BRIG_OPCODE_SBR
:
172 case BRIG_OPCODE_SIGNALNORET
:
173 case BRIG_OPCODE_DEBUGTRAP
:
174 /* FIXME: There are probably missing cases here, double check. */
176 case BRIG_OPCODE_EXPAND
:
177 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
178 return opnum
< operand_count () - 1;
184 /* Return true if OPCODE is an floating-point bit instruction opcode. */
187 hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode
)
191 case BRIG_OPCODE_NEG
:
192 case BRIG_OPCODE_ABS
:
193 case BRIG_OPCODE_CLASS
:
194 case BRIG_OPCODE_COPYSIGN
:
201 /* Return the number of destination operands for this INSN. */
204 hsa_insn_basic::input_count ()
211 case BRIG_OPCODE_NOP
:
214 case BRIG_OPCODE_EXPAND
:
218 /* ld_v[234] not yet handled. */
224 case BRIG_OPCODE_ATOMICNORET
:
227 case BRIG_OPCODE_SIGNAL
:
230 case BRIG_OPCODE_SIGNALNORET
:
233 case BRIG_OPCODE_MEMFENCE
:
236 case BRIG_OPCODE_RDIMAGE
:
237 case BRIG_OPCODE_LDIMAGE
:
238 case BRIG_OPCODE_STIMAGE
:
239 case BRIG_OPCODE_QUERYIMAGE
:
240 case BRIG_OPCODE_QUERYSAMPLER
:
241 sorry ("HSA image ops not handled");
244 case BRIG_OPCODE_CBR
:
248 case BRIG_OPCODE_SBR
:
251 case BRIG_OPCODE_WAVEBARRIER
:
254 case BRIG_OPCODE_BARRIER
:
255 case BRIG_OPCODE_ARRIVEFBAR
:
256 case BRIG_OPCODE_INITFBAR
:
257 case BRIG_OPCODE_JOINFBAR
:
258 case BRIG_OPCODE_LEAVEFBAR
:
259 case BRIG_OPCODE_RELEASEFBAR
:
260 case BRIG_OPCODE_WAITFBAR
:
263 case BRIG_OPCODE_LDF
:
266 case BRIG_OPCODE_ACTIVELANECOUNT
:
267 case BRIG_OPCODE_ACTIVELANEID
:
268 case BRIG_OPCODE_ACTIVELANEMASK
:
269 case BRIG_OPCODE_ACTIVELANEPERMUTE
:
272 case BRIG_OPCODE_CALL
:
273 case BRIG_OPCODE_SCALL
:
274 case BRIG_OPCODE_ICALL
:
277 case BRIG_OPCODE_RET
:
280 case BRIG_OPCODE_ALLOCA
:
283 case BRIG_OPCODE_CLEARDETECTEXCEPT
:
286 case BRIG_OPCODE_SETDETECTEXCEPT
:
289 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
290 case BRIG_OPCODE_PACKETID
:
291 case BRIG_OPCODE_CASQUEUEWRITEINDEX
:
292 case BRIG_OPCODE_LDQUEUEREADINDEX
:
293 case BRIG_OPCODE_LDQUEUEWRITEINDEX
:
294 case BRIG_OPCODE_STQUEUEREADINDEX
:
295 case BRIG_OPCODE_STQUEUEWRITEINDEX
:
298 case BRIG_OPCODE_ADDQUEUEWRITEINDEX
:
301 case BRIG_OPCODE_DEBUGTRAP
:
304 case BRIG_OPCODE_GROUPBASEPTR
:
305 case BRIG_OPCODE_KERNARGBASEPTR
:
308 case HSA_OPCODE_ARG_BLOCK
:
311 case BRIG_KIND_DIRECTIVE_COMMENT
:
316 /* Return the number of source operands for this INSN. */
319 hsa_insn_basic::num_used_ops ()
321 gcc_checking_assert (input_count () <= operand_count ());
323 return operand_count () - input_count ();
326 /* Set alignment to VALUE. */
329 hsa_insn_mem::set_align (BrigAlignment8_t value
)
331 /* TODO: Perhaps remove this dump later on: */
332 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && value
< m_align
)
334 fprintf (dump_file
, "Decreasing alignment to %u in instruction ", value
);
335 dump_hsa_insn (dump_file
, this);
340 /* Return size of HSA type T in bits. */
343 hsa_type_bit_size (BrigType16_t t
)
366 case BRIG_TYPE_U16X2
:
368 case BRIG_TYPE_S16X2
:
369 case BRIG_TYPE_F16X2
:
377 case BRIG_TYPE_U16X4
:
378 case BRIG_TYPE_U32X2
:
380 case BRIG_TYPE_S16X4
:
381 case BRIG_TYPE_S32X2
:
382 case BRIG_TYPE_F16X4
:
383 case BRIG_TYPE_F32X2
:
388 case BRIG_TYPE_U8X16
:
389 case BRIG_TYPE_U16X8
:
390 case BRIG_TYPE_U32X4
:
391 case BRIG_TYPE_U64X2
:
392 case BRIG_TYPE_S8X16
:
393 case BRIG_TYPE_S16X8
:
394 case BRIG_TYPE_S32X4
:
395 case BRIG_TYPE_S64X2
:
396 case BRIG_TYPE_F16X8
:
397 case BRIG_TYPE_F32X4
:
398 case BRIG_TYPE_F64X2
:
402 gcc_assert (hsa_seen_error ());
407 /* Return BRIG bit-type with BITSIZE length. */
410 hsa_bittype_for_bitsize (unsigned bitsize
)
419 return BRIG_TYPE_B16
;
421 return BRIG_TYPE_B32
;
423 return BRIG_TYPE_B64
;
425 return BRIG_TYPE_B128
;
431 /* Return BRIG unsigned int type with BITSIZE length. */
434 hsa_uint_for_bitsize (unsigned bitsize
)
441 return BRIG_TYPE_U16
;
443 return BRIG_TYPE_U32
;
445 return BRIG_TYPE_U64
;
451 /* Return BRIG float type with BITSIZE length. */
454 hsa_float_for_bitsize (unsigned bitsize
)
459 return BRIG_TYPE_F16
;
461 return BRIG_TYPE_F32
;
463 return BRIG_TYPE_F64
;
469 /* Return HSA bit-type with the same size as the type T. */
472 hsa_bittype_for_type (BrigType16_t t
)
474 return hsa_bittype_for_bitsize (hsa_type_bit_size (t
));
477 /* Return HSA unsigned integer type with the same size as the type T. */
480 hsa_unsigned_type_for_type (BrigType16_t t
)
482 return hsa_uint_for_bitsize (hsa_type_bit_size (t
));
485 /* Return true if TYPE is a packed HSA type. */
488 hsa_type_packed_p (BrigType16_t type
)
490 return (type
& BRIG_TYPE_PACK_MASK
) != BRIG_TYPE_PACK_NONE
;
493 /* Return true if and only if TYPE is a floating point number type. */
496 hsa_type_float_p (BrigType16_t type
)
498 switch (type
& BRIG_TYPE_BASE_MASK
)
509 /* Return true if and only if TYPE is an integer number type. */
512 hsa_type_integer_p (BrigType16_t type
)
514 switch (type
& BRIG_TYPE_BASE_MASK
)
530 /* Return true if and only if TYPE is an bit-type. */
533 hsa_btype_p (BrigType16_t type
)
535 switch (type
& BRIG_TYPE_BASE_MASK
)
549 /* Return HSA alignment encoding alignment to N bits. */
552 hsa_alignment_encoding (unsigned n
)
554 gcc_assert (n
>= 8 && !(n
& (n
- 1)));
556 return BRIG_ALIGNMENT_32
;
561 return BRIG_ALIGNMENT_1
;
563 return BRIG_ALIGNMENT_2
;
565 return BRIG_ALIGNMENT_4
;
567 return BRIG_ALIGNMENT_8
;
569 return BRIG_ALIGNMENT_16
;
575 /* Return HSA alignment encoding alignment of T got
576 by get_object_alignment. */
579 hsa_object_alignment (tree t
)
581 return hsa_alignment_encoding (get_object_alignment (t
));
584 /* Return byte alignment for given BrigAlignment8_t value. */
587 hsa_byte_alignment (BrigAlignment8_t alignment
)
589 gcc_assert (alignment
!= BRIG_ALIGNMENT_NONE
);
591 return 1 << (alignment
- 1);
594 /* Return natural alignment of HSA TYPE. */
597 hsa_natural_alignment (BrigType16_t type
)
599 return hsa_alignment_encoding (hsa_type_bit_size (type
& ~BRIG_TYPE_ARRAY
));
602 /* Call the correct destructor of a HSA instruction. */
605 hsa_destroy_insn (hsa_insn_basic
*insn
)
607 if (hsa_insn_phi
*phi
= dyn_cast
<hsa_insn_phi
*> (insn
))
608 phi
->~hsa_insn_phi ();
609 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
610 br
->~hsa_insn_cbr ();
611 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
612 cmp
->~hsa_insn_cmp ();
613 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
614 mem
->~hsa_insn_mem ();
615 else if (hsa_insn_atomic
*atomic
= dyn_cast
<hsa_insn_atomic
*> (insn
))
616 atomic
->~hsa_insn_atomic ();
617 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
618 seg
->~hsa_insn_seg ();
619 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
620 call
->~hsa_insn_call ();
621 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
622 block
->~hsa_insn_arg_block ();
623 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
624 sbr
->~hsa_insn_sbr ();
625 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
627 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
628 comment
->~hsa_insn_comment ();
630 insn
->~hsa_insn_basic ();
633 /* Call the correct destructor of a HSA operand. */
636 hsa_destroy_operand (hsa_op_base
*op
)
638 if (hsa_op_code_list
*list
= dyn_cast
<hsa_op_code_list
*> (op
))
639 list
->~hsa_op_code_list ();
640 else if (hsa_op_operand_list
*list
= dyn_cast
<hsa_op_operand_list
*> (op
))
641 list
->~hsa_op_operand_list ();
642 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
644 else if (hsa_op_immed
*immed
= dyn_cast
<hsa_op_immed
*> (op
))
645 immed
->~hsa_op_immed ();
650 /* Create a mapping between the original function DECL and kernel name NAME. */
653 hsa_add_kern_decl_mapping (tree decl
, char *name
, unsigned omp_data_size
,
654 bool gridified_kernel_p
)
656 hsa_decl_kernel_map_element dkm
;
659 dkm
.omp_data_size
= omp_data_size
;
660 dkm
.gridified_kernel_p
= gridified_kernel_p
;
661 vec_safe_push (hsa_decl_kernel_mapping
, dkm
);
664 /* Return the number of kernel decl name mappings. */
667 hsa_get_number_decl_kernel_mappings (void)
669 return vec_safe_length (hsa_decl_kernel_mapping
);
672 /* Return the decl in the Ith kernel decl name mapping. */
675 hsa_get_decl_kernel_mapping_decl (unsigned i
)
677 return (*hsa_decl_kernel_mapping
)[i
].decl
;
680 /* Return the name in the Ith kernel decl name mapping. */
683 hsa_get_decl_kernel_mapping_name (unsigned i
)
685 return (*hsa_decl_kernel_mapping
)[i
].name
;
688 /* Return maximum OMP size for kernel decl name mapping. */
691 hsa_get_decl_kernel_mapping_omp_size (unsigned i
)
693 return (*hsa_decl_kernel_mapping
)[i
].omp_data_size
;
696 /* Return if the function is gridified kernel in decl name mapping. */
699 hsa_get_decl_kernel_mapping_gridified (unsigned i
)
701 return (*hsa_decl_kernel_mapping
)[i
].gridified_kernel_p
;
704 /* Free the mapping between original decls and kernel names. */
707 hsa_free_decl_kernel_mapping (void)
709 if (hsa_decl_kernel_mapping
== NULL
)
712 for (unsigned i
= 0; i
< hsa_decl_kernel_mapping
->length (); ++i
)
713 free ((*hsa_decl_kernel_mapping
)[i
].name
);
714 ggc_free (hsa_decl_kernel_mapping
);
717 /* Add new kernel dependency. */
720 hsa_add_kernel_dependency (tree caller
, const char *called_function
)
722 if (hsa_decl_kernel_dependencies
== NULL
)
723 hsa_decl_kernel_dependencies
= new hash_map
<tree
, vec
<const char *> *> ();
725 vec
<const char *> *s
= NULL
;
726 vec
<const char *> **slot
= hsa_decl_kernel_dependencies
->get (caller
);
729 s
= new vec
<const char *> ();
730 hsa_decl_kernel_dependencies
->put (caller
, s
);
735 s
->safe_push (called_function
);
738 /* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
739 order to minimize the number of GTY roots, we'll root them all in the
740 following array. The individual elements should only be accessed by the
741 very simple getters (of a pointer-to-tree) below. */
743 static GTY(()) tree hsa_tree_gt_roots
[3];
746 hsa_get_ctor_statements (void)
748 return &hsa_tree_gt_roots
[0];
752 hsa_get_dtor_statements (void)
754 return &hsa_tree_gt_roots
[1];
758 hsa_get_kernel_dispatch_type (void)
760 return &hsa_tree_gt_roots
[2];
763 /* Modify the name P in-place so that it is a valid HSA identifier. */
766 hsa_sanitize_name (char *p
)
769 if (*p
== '.' || *p
== '-')
773 /* Clone the name P, set trailing ampersand and sanitize the name. */
776 hsa_brig_function_name (const char *p
)
778 unsigned len
= strlen (p
);
779 char *buf
= XNEWVEC (char, len
+ 2);
783 memcpy (buf
+ 1, p
, len
);
785 hsa_sanitize_name (buf
);
789 /* Add a flatten attribute and disable vectorization for gpu implementation
790 function decl GDECL. */
792 void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl
)
794 DECL_ATTRIBUTES (gdecl
)
795 = tree_cons (get_identifier ("flatten"), NULL_TREE
,
796 DECL_ATTRIBUTES (gdecl
));
798 tree fn_opts
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
);
799 if (fn_opts
== NULL_TREE
)
800 fn_opts
= optimization_default_node
;
801 fn_opts
= copy_node (fn_opts
);
802 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_loop_vectorize
= false;
803 TREE_OPTIMIZATION (fn_opts
)->x_flag_tree_slp_vectorize
= false;
804 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl
) = fn_opts
;
808 hsa_summary_t::link_functions (cgraph_node
*gpu
, cgraph_node
*host
,
809 hsa_function_kind kind
, bool gridified_kernel_p
)
811 hsa_function_summary
*gpu_summary
= get (gpu
);
812 hsa_function_summary
*host_summary
= get (host
);
814 gpu_summary
->m_kind
= kind
;
815 host_summary
->m_kind
= kind
;
817 gpu_summary
->m_gpu_implementation_p
= true;
818 host_summary
->m_gpu_implementation_p
= false;
820 gpu_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
821 host_summary
->m_gridified_kernel_p
= gridified_kernel_p
;
823 gpu_summary
->m_bound_function
= host
;
824 host_summary
->m_bound_function
= gpu
;
826 process_gpu_implementation_attributes (gpu
->decl
);
828 /* Create reference between a kernel and a corresponding host implementation
829 to quarantee LTO streaming to a same LTRANS. */
830 if (kind
== HSA_KERNEL
)
831 gpu
->create_reference (host
, IPA_REF_ADDR
);
834 /* Add a HOST function to HSA summaries. */
837 hsa_register_kernel (cgraph_node
*host
)
839 if (hsa_summaries
== NULL
)
840 hsa_summaries
= new hsa_summary_t (symtab
);
841 hsa_function_summary
*s
= hsa_summaries
->get (host
);
842 s
->m_kind
= HSA_KERNEL
;
845 /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
849 hsa_register_kernel (cgraph_node
*gpu
, cgraph_node
*host
)
851 if (hsa_summaries
== NULL
)
852 hsa_summaries
= new hsa_summary_t (symtab
);
853 hsa_summaries
->link_functions (gpu
, host
, HSA_KERNEL
, true);
856 /* Return true if expansion of the current HSA function has already failed. */
859 hsa_seen_error (void)
861 return hsa_cfun
->m_seen_error
;
864 /* Mark current HSA function as failed. */
869 hsa_failed_functions
->add (hsa_cfun
->m_decl
);
870 hsa_cfun
->m_seen_error
= true;
874 hsa_internal_fn::name ()
876 char *name
= xstrdup (internal_fn_name (m_fn
));
877 for (char *ptr
= name
; *ptr
; ptr
++)
878 *ptr
= TOLOWER (*ptr
);
880 const char *suffix
= NULL
;
881 if (m_type_bit_size
== 32)
886 char *name2
= concat (name
, suffix
, NULL
);
891 hsa_sanitize_name (name
);
896 hsa_internal_fn::get_arity ()
913 case IFN_SIGNIFICAND
:
939 /* As we produce sorry message for unknown internal functions,
940 reaching this label is definitely a bug. */
946 hsa_internal_fn::get_argument_type (int n
)
963 case IFN_SIGNIFICAND
:
979 return hsa_float_for_bitsize (m_type_bit_size
);
982 if (n
== -1 || n
== 0)
983 return hsa_float_for_bitsize (m_type_bit_size
);
985 return BRIG_TYPE_S32
;
988 /* As we produce sorry message for unknown internal functions,
989 reaching this label is definitely a bug. */