1 /* brig-function.cc -- declaration of brig_function class.
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "brig-function.h"
26 #include "stringpool.h"
27 #include "tree-iterator.h"
30 #include "gimple-expr.h"
31 #include "print-tree.h"
32 #include "hsa-brig-format.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-code-entry-handler.h"
36 #include "brig-machine.h"
37 #include "brig-util.h"
39 #include "tree-pretty-print.h"
41 #include "profile-count.h"
45 #include "brig-to-generic.h"
46 #include "brig-builtins.h"
48 #include "fold-const.h"
52 brig_function::builtin_map
brig_function::s_custom_builtins
;
54 brig_function::brig_function (const BrigDirectiveExecutable
*exec
,
55 brig_to_generic
*parent
)
56 : m_brig_def (exec
), m_is_kernel (false), m_is_finished (false), m_name (""),
57 m_current_bind_expr (NULL_TREE
), m_func_decl (NULL_TREE
),
58 m_context_arg (NULL_TREE
), m_group_base_arg (NULL_TREE
),
59 m_private_base_arg (NULL_TREE
), m_ret_value (NULL_TREE
),
60 m_next_kernarg_offset (0), m_kernarg_max_align (0),
61 m_ret_value_brig_var (NULL
), m_has_barriers (false), m_has_allocas (false),
62 m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
63 m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
64 m_generating_arg_block (false), m_parent (parent
)
67 BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT
* sizeof (BrigOperandRegister
*));
68 memset (&m_descriptor
, 0, sizeof (phsa_descriptor
));
70 if (s_custom_builtins
.size () > 0) return;
72 /* Populate the builtin index. */
73 #undef DEF_HSAIL_ATOMIC_BUILTIN
74 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
75 #undef DEF_HSAIL_INTR_BUILTIN
76 #undef DEF_HSAIL_SAT_BUILTIN
77 #undef DEF_HSAIL_BUILTIN
78 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
79 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
80 = builtin_decl_explicit (ENUM);
82 #include "brig-builtins.def"
85 brig_function::~brig_function ()
87 for (size_t i
= 0; i
< BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT
; ++i
)
89 if (m_regs
[i
] != NULL
)
97 /* Returns a GENERIC label with the given name in the given function.
98 Creates it, if not yet found. */
101 brig_function::label (const std::string
&name
)
103 label_index::const_iterator i
= m_label_index
.find (name
);
104 if (i
== m_label_index
.end ())
107 = get_identifier_with_length (name
.c_str (), name
.size ());
109 tree label_decl
= build_decl (UNKNOWN_LOCATION
, LABEL_DECL
,
110 name_identifier
, void_type_node
);
112 DECL_CONTEXT (label_decl
) = m_func_decl
;
113 DECL_ARTIFICIAL (label_decl
) = 0;
115 m_label_index
[name
] = label_decl
;
122 /* Record an argument variable for later use. This includes both local
123 variables inside arg blocks and incoming function arguments. */
126 brig_function::add_arg_variable (const BrigDirectiveVariable
*brigVar
,
129 m_arg_variables
[brigVar
] = treeDecl
;
133 brig_function::arg_variable (const BrigDirectiveVariable
*var
) const
135 variable_index::const_iterator i
= m_arg_variables
.find (var
);
136 if (i
== m_arg_variables
.end ())
142 /* Appends a new kernel argument descriptor for the current kernel's
146 brig_function::append_kernel_arg (const BrigDirectiveVariable
*var
, size_t size
,
149 gcc_assert (m_func_decl
!= NULL_TREE
);
150 gcc_assert (m_is_kernel
);
152 size_t align_padding
= m_next_kernarg_offset
% alignment
== 0 ?
153 0 : (alignment
- m_next_kernarg_offset
% alignment
);
154 m_next_kernarg_offset
+= align_padding
;
155 m_kernarg_offsets
[var
] = m_next_kernarg_offset
;
156 m_next_kernarg_offset
+= size
;
159 = m_kernarg_max_align
< alignment
? alignment
: m_kernarg_max_align
;
163 brig_function::kernel_arg_offset (const BrigDirectiveVariable
*var
) const
165 var_offset_table::const_iterator i
= m_kernarg_offsets
.find (var
);
166 gcc_assert (i
!= m_kernarg_offsets
.end ());
170 /* Add work-item ID variables to the beginning of the kernel function
171 which can be used for address computation as kernel dispatch packet
172 instructions can be expanded to GENERIC nodes referring to them. */
175 brig_function::add_id_variables ()
177 tree bind_expr
= m_current_bind_expr
;
178 tree stmts
= BIND_EXPR_BODY (bind_expr
);
180 /* Initialize the WG limits and local ids. */
181 m_kernel_entry
= tsi_start (stmts
);
183 for (int i
= 0; i
< 3; ++i
)
185 char dim_char
= (char) ((int) 'x' + i
);
187 /* The local sizes are limited to 16b values, but let's still use 32b
188 to avoid unnecessary casts (the ID functions are 32b). */
190 = add_local_variable (std::string ("__local_") + dim_char
,
191 long_long_integer_type_node
);
194 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID
), 2,
195 uint32_type_node
, uint32_type_node
,
196 build_int_cst (uint32_type_node
, i
), ptr_type_node
,
199 tree id_init
= build2 (MODIFY_EXPR
, TREE_TYPE (m_local_id_vars
[i
]),
201 convert (TREE_TYPE (m_local_id_vars
[i
]),
204 append_statement (id_init
);
206 m_cur_wg_size_vars
[i
]
207 = add_local_variable (std::string ("__cur_wg_size_") + dim_char
,
208 long_long_integer_type_node
);
211 if (flag_assume_phsa
)
213 tree_stl_vec operands
214 = tree_stl_vec (1, build_int_cst (uint32_type_node
, i
));
216 = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE
,
217 BRIG_TYPE_U32
, uint32_type_node
,
221 cwgz_call
= call_builtin
222 (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE
),
223 2, uint32_type_node
, uint32_type_node
,
224 build_int_cst (uint32_type_node
, i
), ptr_type_node
, m_context_arg
);
226 tree limit_init
= build2 (MODIFY_EXPR
, TREE_TYPE (m_cur_wg_size_vars
[i
]),
227 m_cur_wg_size_vars
[i
],
228 convert (TREE_TYPE (m_cur_wg_size_vars
[i
]),
231 append_statement (limit_init
);
234 = add_local_variable (std::string ("__workgroupid_") + dim_char
,
238 if (flag_assume_phsa
)
240 tree_stl_vec operands
241 = tree_stl_vec (1, build_int_cst (uint32_type_node
, i
));
243 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID
, BRIG_TYPE_U32
,
244 uint32_type_node
, operands
);
248 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID
),
249 2, uint32_type_node
, uint32_type_node
,
250 build_int_cst (uint32_type_node
, i
), ptr_type_node
,
253 tree wgid_init
= build2 (MODIFY_EXPR
, TREE_TYPE (m_wg_id_vars
[i
]),
254 m_wg_id_vars
[i
], wgid_call
);
256 append_statement (wgid_init
);
259 = add_local_variable (std::string ("__workgroupsize_") + dim_char
,
263 if (flag_assume_phsa
)
265 tree_stl_vec operands
266 = tree_stl_vec (1, build_int_cst (uint32_type_node
, i
));
268 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE
, BRIG_TYPE_U32
,
269 uint32_type_node
, operands
);
273 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE
),
274 2, uint32_type_node
, uint32_type_node
,
275 build_int_cst (uint32_type_node
, i
), ptr_type_node
,
278 tree wgsize_init
= build2 (MODIFY_EXPR
, TREE_TYPE (m_wg_size_vars
[i
]),
279 m_wg_size_vars
[i
], wgsize_call
);
281 append_statement (wgsize_init
);
284 = add_local_variable (std::string ("__gridsize_") + dim_char
,
288 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE
), 2,
289 uint32_type_node
, uint32_type_node
,
290 build_int_cst (uint32_type_node
, i
), ptr_type_node
,
293 tree gridsize_init
= build2 (MODIFY_EXPR
, TREE_TYPE (m_grid_size_vars
[i
]),
294 m_grid_size_vars
[i
], gridsize_call
);
296 append_statement (gridsize_init
);
298 m_abs_id_base_vars
[i
]
299 = add_local_variable (std::string ("__abs_id_base_") + dim_char
,
300 long_long_integer_type_node
);
303 = add_local_variable (std::string ("__abs_id_") + dim_char
,
304 long_long_integer_type_node
);
307 = build2 (MULT_EXPR
, long_long_integer_type_node
,
308 convert (long_long_integer_type_node
, m_wg_id_vars
[i
]),
309 convert (long_long_integer_type_node
, m_wg_size_vars
[i
]));
311 = build2 (PLUS_EXPR
, long_long_integer_type_node
, abs_id_base
,
312 convert (long_long_integer_type_node
, m_local_id_vars
[i
]));
314 tree abs_id_base_init
315 = build2 (MODIFY_EXPR
, TREE_TYPE (m_abs_id_base_vars
[i
]),
316 m_abs_id_base_vars
[i
], abs_id_base
);
317 append_statement (abs_id_base_init
);
319 tree abs_id_init
= build2 (MODIFY_EXPR
,
320 TREE_TYPE (m_abs_id_vars
[i
]),
321 m_abs_id_vars
[i
], abs_id
);
322 append_statement (abs_id_init
);
326 /* Creates a new local variable with the given NAME and given GENERIC
330 brig_function::add_local_variable (std::string name
, tree type
)
333 = get_identifier_with_length (name
.c_str (), name
.size ());
335 = build_decl (UNKNOWN_LOCATION
, VAR_DECL
, name_identifier
, type
);
337 DECL_NONLOCAL (variable
) = 0;
338 TREE_ADDRESSABLE (variable
) = 0;
339 TREE_STATIC (variable
) = 0;
340 TREE_USED (variable
) = 1;
341 DECL_ARTIFICIAL (variable
) = 0;
343 tree bind_expr
= DECL_SAVED_TREE (m_func_decl
);
345 DECL_CONTEXT (variable
) = m_func_decl
;
347 DECL_CHAIN (variable
) = BIND_EXPR_VARS (bind_expr
);
348 BIND_EXPR_VARS (bind_expr
) = variable
;
352 /* Return tree type for an HSA register.
354 The tree type can be anything (scalar, vector, int, float, etc.)
355 but its size is guaranteed to match the HSA register size.
357 HSA registers are untyped but we select a type based on their use
358 to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
359 to occur when use or def reaches over current BB). */
362 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister
*reg
) const
364 size_t reg_size
= gccbrig_reg_size (reg
);
366 /* The default type. */
367 tree type
= build_nonstandard_integer_type (reg_size
, true);
369 if (m_parent
->m_fn_regs_use_index
.count (m_name
) == 0)
372 const regs_use_index
&index
= m_parent
->m_fn_regs_use_index
[m_name
];
373 size_t reg_id
= gccbrig_hsa_reg_id (*reg
);
374 if (index
.count (reg_id
) == 0)
377 const reg_use_info
&info
= index
.find (reg_id
)->second
;
378 std::vector
<std::pair
<tree
, size_t> >::const_iterator it
379 = info
.m_type_refs
.begin ();
380 std::vector
<std::pair
<tree
, size_t> >::const_iterator it_end
381 = info
.m_type_refs
.end ();
382 size_t max_refs_as_type_count
= 0;
383 for (; it
!= it_end
; it
++)
385 size_t type_bit_size
= int_size_in_bytes (it
->first
) * BITS_PER_UNIT
;
386 if (type_bit_size
!= reg_size
) continue;
387 if (it
->second
> max_refs_as_type_count
)
390 max_refs_as_type_count
= it
->second
;
397 /* Returns a DECL_VAR for the given HSAIL operand register.
398 If it has not been created yet for the function being generated,
399 creates it as a type determined by analysis phase. */
402 brig_function::get_m_var_declfor_reg (const BrigOperandRegister
*reg
)
404 size_t offset
= gccbrig_hsa_reg_id (*reg
);
406 reg_decl_index_entry
*regEntry
= m_regs
[offset
];
407 if (regEntry
== NULL
)
409 size_t reg_size
= gccbrig_reg_size (reg
);
412 type
= get_tree_type_for_hsa_reg (reg
);
414 type
= boolean_type_node
;
416 /* Drop the const qualifier so we do not end up with a read only
417 register variable which cannot be written to later. */
418 tree nonconst_type
= build_type_variant (type
, false, false);
420 regEntry
= new reg_decl_index_entry
;
423 = add_local_variable (gccbrig_reg_name (reg
), nonconst_type
);
424 m_regs
[offset
] = regEntry
;
426 return regEntry
->m_var_decl
;
429 /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is
430 a statement after which the iteration variables should be initialized and
431 the loop body starts. BRANCH_AFTER is the statement after which the loop
432 predicate check and the back edge goto will be appended. */
435 brig_function::add_wi_loop (int dim
, tree_stmt_iterator
*header_entry
,
436 tree_stmt_iterator
*branch_after
)
438 tree ivar
= m_local_id_vars
[dim
];
439 tree abs_id_base_var
= m_abs_id_base_vars
[dim
];
440 tree abs_id_var
= m_abs_id_vars
[dim
];
441 tree ivar_max
= m_cur_wg_size_vars
[dim
];
442 tree_stmt_iterator entry
= *header_entry
;
444 /* TODO: this is not a parallel loop as we share the "register variables"
445 across work-items. Should create a copy of them per WI instance. That
446 is, declare temporaries for new definitions inside the loop body, not at
449 tree ivar_init
= build2 (MODIFY_EXPR
, TREE_TYPE (ivar
), ivar
,
450 build_zero_cst (TREE_TYPE (ivar
)));
451 tsi_link_after (&entry
, ivar_init
, TSI_NEW_STMT
);
453 tree abs_id_var_init
= build2 (MODIFY_EXPR
, TREE_TYPE (abs_id_var
),
455 convert (TREE_TYPE (abs_id_var
),
457 tsi_link_after (&entry
, abs_id_var_init
, TSI_NEW_STMT
);
460 = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim
));
461 tree loop_body_label_stmt
= build_stmt (LABEL_EXPR
, loop_body_label
);
463 tsi_link_after (&entry
, loop_body_label_stmt
, TSI_NEW_STMT
);
465 if (m_has_unexpanded_dp_builtins
)
467 if (!flag_assume_phsa
)
470 = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID
);
471 /* Set the local ID to the current wi-loop iteration variable value
472 to ensure the builtins see the correct values. */
474 = call_builtin (id_set_builtin
, 3,
475 void_type_node
, uint32_type_node
,
476 build_int_cst (uint32_type_node
, dim
),
477 uint32_type_node
, convert (uint32_type_node
, ivar
),
478 ptr_type_node
, m_context_arg
);
479 tsi_link_after (&entry
, id_set_call
, TSI_NEW_STMT
);
483 tree ptr_type
= build_pointer_type (uint32_type_node
);
484 tree ctx
= build2 (MEM_REF
, uint32_type_node
, m_context_arg
,
485 build_int_cst (ptr_type
, dim
* 4));
486 tree assign
= build2 (MODIFY_EXPR
, uint32_type_node
, ctx
,
487 convert (uint32_type_node
, ivar
));
489 tsi_link_after (&entry
, assign
, TSI_NEW_STMT
);
493 /* Increment the WI iteration variable. */
494 tree incr
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (ivar
), ivar
,
495 build_one_cst (TREE_TYPE (ivar
)));
497 tsi_link_after (branch_after
, incr
, TSI_NEW_STMT
);
499 /* ...and the abs id variable. */
500 tree abs_id_incr
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (abs_id_var
),
502 build_one_cst (TREE_TYPE (abs_id_var
)));
504 tsi_link_after (branch_after
, abs_id_incr
, TSI_NEW_STMT
);
506 /* Append the predicate check with the back edge goto. */
507 tree condition
= build2 (LT_EXPR
, TREE_TYPE (ivar
), ivar
, ivar_max
);
508 tree target_goto
= build1 (GOTO_EXPR
, void_type_node
, loop_body_label
);
510 = build3 (COND_EXPR
, void_type_node
, condition
, target_goto
, NULL_TREE
);
511 tsi_link_after (branch_after
, if_stmt
, TSI_NEW_STMT
);
514 /* Recursively analyzes the function and its callees for barrier usage. */
517 brig_function::analyze_calls ()
519 if (m_calls_analyzed
)
522 /* Set this early to not get stuck in case of recursive call graphs.
523 This is safe because if the function calls itself, either the function
524 has barrier calls which implies a call to a function with barrier calls,
525 or it doesn't in which case the result depends on the later called
527 m_calls_analyzed
= true;
529 for (size_t i
= 0; i
< m_called_functions
.size (); ++i
)
531 tree f
= m_called_functions
[i
];
532 brig_function
*called_f
= m_parent
->get_finished_function (f
);
533 if (called_f
== NULL
)
535 /* Unfinished function (only declaration within the set of BRIGs)
536 found. Cannot finish the CG analysis. Have to assume it does have
537 a barrier for safety. */
538 m_has_function_calls_with_barriers
= true;
539 m_has_unexpanded_dp_builtins
= true;
542 called_f
->analyze_calls ();
543 /* We can assume m_has_barriers has been correctly set during the
544 construction of the function decl. No need to reanalyze it. */
545 m_has_function_calls_with_barriers
|= called_f
->m_has_barriers
;
547 /* If the function or any of its called functions has dispatch
548 packet builtin calls that require the local id, we need to
549 set the local id to the context in the work item loop before
550 the functions are called. If we analyze the opposite, these
551 function calls can be omitted. */
552 m_has_unexpanded_dp_builtins
|= called_f
->m_has_unexpanded_dp_builtins
;
556 /* Tries to convert the current kernel to a work-group function that executes
557 all work-items using loops. Returns true in case the conversion was
561 brig_function::convert_to_wg_function ()
563 if (!m_calls_analyzed
)
566 if (m_has_barriers
|| m_has_function_calls_with_barriers
)
569 /* The most trivial case: No barriers at all in the kernel.
570 We can create one big work-item loop around the whole kernel. */
571 tree bind_expr
= m_current_bind_expr
;
572 tree stmts
= BIND_EXPR_BODY (bind_expr
);
574 for (int i
= 0; i
< 3; ++i
)
576 /* The previous loop has added a new label to the end of the function,
577 the next level loop should wrap around it also. */
578 tree_stmt_iterator function_exit
= tsi_last (stmts
);
579 add_wi_loop (i
, &m_kernel_entry
, &function_exit
);
582 m_is_wg_function
= true;
586 /* Emits a kernel description to a special ELF section so it can be
587 utilized by an HSA runtime implementation. The assembly block
588 must be emitted to a statement list of an function, which is given
589 as an argument. Returns the assembly block used to emit the section. */
592 brig_function::emit_metadata (tree stmt_list
)
594 /* Emit an ELF section via an assembly directive that generates a special
595 ELF section for each kernel that contains raw bytes of a descriptor
596 object. This is pretty disgusting, but life is never perfect ;) */
598 /* Use the original kernel name without the '_' prefix in the section name. */
599 std::string kern_name
= m_is_kernel
? m_name
.substr (1) : m_name
;
601 std::ostringstream strstr
;
603 << ".pushsection " << PHSA_DESC_SECTION_PREFIX
<< kern_name
605 << "\t.p2align 1, 1, 1" << std::endl
608 for (size_t i
= 0; i
< sizeof (phsa_descriptor
); ++i
)
610 strstr
<< "0x" << std::setw (2) << std::setfill ('0') << std::hex
611 << (unsigned) *((unsigned char *) &m_descriptor
+ i
);
612 if (i
+ 1 < sizeof (phsa_descriptor
))
616 strstr
<< std::endl
<< ".popsection" << std::endl
<< std::endl
;
619 = build_stmt (ASM_EXPR
,
620 build_string (strstr
.str ().size (), strstr
.str ().c_str ()),
621 NULL_TREE
, NULL_TREE
, NULL_TREE
, NULL_TREE
);
623 append_to_statement_list_force (metadata_asm
, &stmt_list
);
627 /* Emits the kernel launcher function. Also emits the metadata section
628 creation statements in it.
630 The launcher function calls the device-side runtime
631 that runs the kernel for all work-items. In C:
633 void KernelName (void* context, void* group_base_addr)
635 __hsail_launch_kernel (_KernelName, context, group_base_addr);
638 or, in case of a successful conversion to a work-group function:
640 void KernelName (void* context, void* group_base_addr)
642 __hsail_launch_wg_function (_KernelName, context, group_base_addr);
645 The user/host sees this function as the kernel to call from the
646 outside. The actual kernel generated from HSAIL was named _KernelName.
650 brig_function::emit_launcher_and_metadata ()
652 /* The original kernel name without the '_' prefix. */
653 std::string kern_name
= m_name
.substr (1);
656 = get_identifier_with_length (kern_name
.c_str (), kern_name
.size ());
658 tree restrict_void_ptr
659 = build_qualified_type (build_pointer_type (void_type_node
),
661 tree restrict_char_ptr
662 = build_qualified_type (build_pointer_type (char_type_node
),
665 = build_decl (UNKNOWN_LOCATION
, FUNCTION_DECL
, name_identifier
,
666 build_function_type_list (void_type_node
, restrict_void_ptr
,
667 restrict_char_ptr
, NULL_TREE
));
669 TREE_USED (launcher
) = 1;
670 DECL_ARTIFICIAL (launcher
) = 1;
672 tree context_arg
= build_decl (UNKNOWN_LOCATION
, PARM_DECL
,
673 get_identifier ("__context"),
676 DECL_ARGUMENTS (launcher
) = context_arg
;
677 DECL_ARG_TYPE (context_arg
) = restrict_void_ptr
;
678 DECL_CONTEXT (context_arg
) = launcher
;
679 TREE_USED (context_arg
) = 1;
680 DECL_ARTIFICIAL (context_arg
) = 1;
682 tree group_base_addr_arg
683 = build_decl (UNKNOWN_LOCATION
, PARM_DECL
,
684 get_identifier ("__group_base_addr"), restrict_char_ptr
);
686 chainon (DECL_ARGUMENTS (launcher
), group_base_addr_arg
);
687 DECL_ARG_TYPE (group_base_addr_arg
) = restrict_char_ptr
;
688 DECL_CONTEXT (group_base_addr_arg
) = launcher
;
689 TREE_USED (group_base_addr_arg
) = 1;
690 DECL_ARTIFICIAL (group_base_addr_arg
) = 1;
693 = build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, void_type_node
);
695 DECL_RESULT (launcher
) = resdecl
;
696 DECL_CONTEXT (resdecl
) = launcher
;
698 DECL_INITIAL (launcher
) = make_node (BLOCK
);
699 TREE_USED (DECL_INITIAL (launcher
)) = 1;
701 tree stmt_list
= alloc_stmt_list ();
703 tree bind_expr
= build3 (BIND_EXPR
, void_type_node
, NULL
, stmt_list
, NULL
);
705 TREE_STATIC (launcher
) = 1;
706 TREE_PUBLIC (launcher
) = 1;
708 DECL_SAVED_TREE (launcher
) = bind_expr
;
710 if (DECL_STRUCT_FUNCTION (launcher
) == NULL
)
711 push_struct_function (launcher
);
713 push_cfun (DECL_STRUCT_FUNCTION (launcher
));
715 tree kernel_func_ptr
= build1 (ADDR_EXPR
, ptr_type_node
, m_func_decl
);
717 tree phsail_launch_kernel_call
;
719 /* Compute the local group segment frame start pointer. */
720 tree group_local_offset_temp
721 = create_tmp_var (uint32_type_node
, "group_local_offset");
722 tree group_local_offset_arg
723 = build2 (MODIFY_EXPR
, uint32_type_node
,
724 group_local_offset_temp
,
725 build_int_cst (uint32_type_node
,
726 m_parent
->m_module_group_variables
.size()));
728 /* Emit a launcher depending whether we converted the kernel function to
729 a work group function or not. */
730 if (m_is_wg_function
)
731 phsail_launch_kernel_call
732 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC
),
734 ptr_type_node
, kernel_func_ptr
, restrict_void_ptr
,
735 context_arg
, restrict_char_ptr
, group_base_addr_arg
,
736 uint32_type_node
, group_local_offset_arg
);
738 phsail_launch_kernel_call
739 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL
),
741 ptr_type_node
, kernel_func_ptr
, restrict_void_ptr
,
742 context_arg
, restrict_char_ptr
, group_base_addr_arg
,
743 uint32_type_node
, group_local_offset_arg
);
745 append_to_statement_list_force (phsail_launch_kernel_call
, &stmt_list
);
747 emit_metadata (stmt_list
);
749 set_externally_visible (launcher
);
755 brig_function::append_statement (tree stmt
)
757 gcc_assert (m_func_decl
!= NULL
);
759 tree bind_expr
= m_current_bind_expr
;
760 tree stmts
= BIND_EXPR_BODY (bind_expr
);
762 append_to_statement_list_force (stmt
, &stmts
);
766 /* Creates a new "alloca frame" for the current function by
767 injecting an alloca frame push in the beginning of the function
768 and an alloca frame pop before all function exit points. */
771 brig_function::create_alloca_frame ()
773 tree_stmt_iterator entry
;
775 /* Adds the alloca push only after the ids have been initialized
776 in case of a kernel function. */
778 entry
= m_kernel_entry
;
781 tree bind_expr
= m_current_bind_expr
;
782 tree stmts
= BIND_EXPR_BODY (bind_expr
);
783 entry
= tsi_start (stmts
);
786 tree push_frame_builtin
= builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME
);
788 = call_builtin (push_frame_builtin
, 1, void_type_node
, ptr_type_node
,
791 tsi_link_before (&entry
, push_frame_call
, TSI_NEW_STMT
);
793 tree pop_frame_builtin
= builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME
);
797 tree stmt
= tsi_stmt (entry
);
798 if (TREE_CODE (stmt
) == RETURN_EXPR
)
801 = call_builtin (pop_frame_builtin
, 1, void_type_node
,
802 ptr_type_node
, m_context_arg
);
804 tsi_link_before (&entry
, pop_frame_call
, TSI_SAME_STMT
);
808 while (!tsi_end_p (entry
));
811 /* Finishes the currently built function. After calling this, no new
812 statements should be appeneded to the function. */
814 brig_function::finish ()
816 append_return_stmt ();
818 /* Currently assume single alloca frame per WG. */
820 create_alloca_frame ();
824 brig_function::finish_kernel ()
826 /* Kernel functions should have a single exit point.
827 Let's create one. The return instructions should have
828 been converted to branches to this label. */
829 append_statement (build_stmt (LABEL_EXPR
, m_exit_label
));
830 /* Attempt to convert the kernel to a work-group function that
831 executes all work-items of the WG using a loop. */
832 convert_to_wg_function ();
834 append_return_stmt ();
836 /* Currently assume single alloca frame per WG. */
838 create_alloca_frame ();
842 brig_function::append_return_stmt ()
844 gcc_assert (m_current_bind_expr
!= NULL_TREE
);
845 tree stmts
= BIND_EXPR_BODY (m_current_bind_expr
);
847 if (STATEMENT_LIST_TAIL (stmts
) == NULL
)
848 return; /* Empty function. */
850 tree last_stmt
= tsi_stmt (tsi_last (stmts
));
852 if (TREE_CODE (last_stmt
) == RETURN_EXPR
)
855 if (m_ret_value
!= NULL_TREE
)
858 = build2 (MODIFY_EXPR
, TREE_TYPE (m_ret_value
), m_ret_value
,
862 = build1 (RETURN_EXPR
, TREE_TYPE (result_assign
), result_assign
);
863 append_to_statement_list_force (return_expr
, &stmts
);
867 tree return_stmt
= build_stmt (RETURN_EXPR
, NULL
);
868 append_to_statement_list_force (return_stmt
, &stmts
);
873 brig_function::has_function_scope_var (const BrigBase
* var
) const
875 return m_function_scope_vars
.find (var
) != m_function_scope_vars
.end ();
879 brig_function::group_variable_segment_offset (const std::string
&name
) const
881 if (m_local_group_variables
.has_variable (name
))
882 return m_local_group_variables
.segment_offset (name
);
884 gcc_assert (m_parent
->m_module_group_variables
.has_variable (name
));
885 return m_parent
->m_module_group_variables
.segment_offset (name
);
888 /* Try to expand the given builtin call to reuse a previously generated
889 variable, if possible. If not, just call the given builtin.
890 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
891 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
895 brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode
,
896 BrigType16_t brig_type
,
898 tree_stl_vec
&operands
)
900 if (needs_workitem_context_data (brig_opcode
))
901 m_has_unexpanded_dp_builtins
= true;
903 if (can_expand_builtin (brig_opcode
))
904 return expand_builtin (brig_opcode
, operands
);
907 = get_builtin_for_hsa_opcode (arith_type
, brig_opcode
, brig_type
);
909 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in
)))
910 && arith_type
!= NULL_TREE
&& VECTOR_TYPE_P (arith_type
)
911 && brig_opcode
!= BRIG_OPCODE_LERP
912 && brig_opcode
!= BRIG_OPCODE_PACKCVT
913 && brig_opcode
!= BRIG_OPCODE_SAD
914 && brig_opcode
!= BRIG_OPCODE_SADHI
)
916 /* Call the scalar built-in for all elements in the vector. */
917 tree_stl_vec operand0_elements
;
918 if (operands
.size () > 0)
919 unpack (operands
[0], operand0_elements
);
921 tree_stl_vec operand1_elements
;
922 if (operands
.size () > 1)
923 unpack (operands
[1], operand1_elements
);
925 tree_stl_vec result_elements
;
927 size_t element_count
= gccbrig_type_vector_subparts (arith_type
);
928 for (size_t i
= 0; i
< element_count
; ++i
)
930 tree_stl_vec call_operands
;
931 if (operand0_elements
.size () > 0)
932 call_operands
.push_back (operand0_elements
.at (i
));
934 if (operand1_elements
.size () > 0)
935 call_operands
.push_back (operand1_elements
.at (i
));
937 result_elements
.push_back
938 (expand_or_call_builtin (brig_opcode
, brig_type
,
939 TREE_TYPE (arith_type
),
942 return pack (result_elements
);
945 tree_stl_vec call_operands
;
946 tree_stl_vec operand_types
;
948 tree arg_type_chain
= TYPE_ARG_TYPES (TREE_TYPE (built_in
));
950 for (size_t i
= 0; i
< operands
.size (); ++i
)
952 tree operand_type
= TREE_VALUE (arg_type_chain
);
953 call_operands
.push_back (convert (operand_type
, operands
[i
]));
954 operand_types
.push_back (operand_type
);
955 arg_type_chain
= TREE_CHAIN (arg_type_chain
);
958 if (needs_workitem_context_data (brig_opcode
))
960 call_operands
.push_back (m_context_arg
);
961 operand_types
.push_back (ptr_type_node
);
964 size_t operand_count
= call_operands
.size ();
966 call_operands
.resize (4, NULL_TREE
);
967 operand_types
.resize (4, NULL_TREE
);
968 for (size_t i
= 0; i
< operand_count
; ++i
)
969 call_operands
.at (i
) = build_resize_convert_view (operand_types
.at (i
),
970 call_operands
.at (i
));
972 tree fnptr
= build_fold_addr_expr (built_in
);
973 return build_call_array (TREE_TYPE (TREE_TYPE (built_in
)), fnptr
,
974 operand_count
, &call_operands
[0]);
977 /* Instead of calling a built-in function, use a more efficient mechanism
978 such as reuse a previously returned value known to be still valid, or
979 access the work-item context struct directly. This is beneficial especially
980 for the work-item identification related builtins as not having them as
981 unanalyzable black box calls can lead to more easily vectorizable parallel
982 loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin
983 and OPERANDS store the operands. */
986 brig_function::expand_builtin (BrigOpcode16_t brig_opcode
,
987 tree_stl_vec
&operands
)
989 tree_stl_vec uint32_0
= tree_stl_vec (1, build_int_cst (uint32_type_node
, 0));
991 tree_stl_vec uint32_1
= tree_stl_vec (1, build_int_cst (uint32_type_node
, 1));
993 tree_stl_vec uint32_2
= tree_stl_vec (1, build_int_cst (uint32_type_node
, 2));
995 if (brig_opcode
== BRIG_OPCODE_WORKITEMFLATABSID
)
997 tree id0
= expand_builtin (BRIG_OPCODE_WORKITEMABSID
, uint32_0
);
998 id0
= convert (uint64_type_node
, id0
);
1000 tree id1
= expand_builtin (BRIG_OPCODE_WORKITEMABSID
, uint32_1
);
1001 id1
= convert (uint64_type_node
, id1
);
1003 tree id2
= expand_builtin (BRIG_OPCODE_WORKITEMABSID
, uint32_2
);
1004 id2
= convert (uint64_type_node
, id2
);
1006 tree max0
= convert (uint64_type_node
, m_grid_size_vars
[0]);
1007 tree max1
= convert (uint64_type_node
, m_grid_size_vars
[1]);
1009 tree id2_x_max0_x_max1
= build2 (MULT_EXPR
, uint64_type_node
, id2
, max0
);
1011 = build2 (MULT_EXPR
, uint64_type_node
, id2_x_max0_x_max1
, max1
);
1013 tree id1_x_max0
= build2 (MULT_EXPR
, uint64_type_node
, id1
, max0
);
1015 tree sum
= build2 (PLUS_EXPR
, uint64_type_node
, id0
, id1_x_max0
);
1016 sum
= build2 (PLUS_EXPR
, uint64_type_node
, sum
, id2_x_max0_x_max1
);
1018 return add_temp_var ("workitemflatabsid", sum
);
1020 else if (brig_opcode
== BRIG_OPCODE_WORKITEMABSID
)
1022 HOST_WIDE_INT dim
= int_constant_value (operands
[0]);
1023 return m_abs_id_vars
[dim
];
1025 else if (brig_opcode
== BRIG_OPCODE_WORKITEMFLATID
)
1028 tree wg_size_x
= expand_builtin (BRIG_OPCODE_WORKGROUPSIZE
, uint32_0
);
1029 tree wg_size_y
= expand_builtin (BRIG_OPCODE_WORKGROUPSIZE
, uint32_1
);
1031 = build2 (MULT_EXPR
, uint32_type_node
,
1032 convert (uint32_type_node
,
1033 expand_builtin (BRIG_OPCODE_WORKITEMID
, uint32_2
)),
1035 z_x_wgsx_wgsy
= build2 (MULT_EXPR
, uint32_type_node
, z_x_wgsx_wgsy
,
1039 = build2 (MULT_EXPR
, uint32_type_node
,
1040 convert (uint32_type_node
,
1041 expand_builtin (BRIG_OPCODE_WORKITEMID
, uint32_1
)),
1044 tree sum
= build2 (PLUS_EXPR
, uint32_type_node
, y_x_wgsx
, z_x_wgsx_wgsy
);
1045 sum
= build2 (PLUS_EXPR
, uint32_type_node
,
1046 convert (uint32_type_node
,
1047 expand_builtin (BRIG_OPCODE_WORKITEMID
, uint32_0
)),
1049 return add_temp_var ("workitemflatid", sum
);
1051 else if (brig_opcode
== BRIG_OPCODE_WORKGROUPSIZE
)
1053 HOST_WIDE_INT dim
= int_constant_value (operands
[0]);
1054 if (flag_assume_phsa
)
1056 tree ptr_type
= build_pointer_type (uint32_type_node
);
1057 tree ctx
= build2 (MEM_REF
, uint32_type_node
, m_context_arg
,
1058 build_int_cst (ptr_type
,
1059 PHSA_CONTEXT_WG_SIZES
1061 std::string
name ("wgsize_x");
1062 name
[name
.length() - 1] += dim
;
1063 return add_temp_var (name
.c_str(), ctx
);
1065 else if (m_is_kernel
)
1067 /* For kernels without phsa we generate certain temps before
1068 the WI loop, which means we don't need to rely on LICM to get
1070 return m_wg_size_vars
[dim
];
1075 else if (brig_opcode
== BRIG_OPCODE_WORKITEMID
)
1077 HOST_WIDE_INT dim
= int_constant_value (operands
[0]);
1080 return m_local_id_vars
[dim
];
1082 else if (flag_assume_phsa
)
1084 tree ptr_type
= build_pointer_type (uint32_type_node
);
1085 tree ctx
= build2 (MEM_REF
, uint32_type_node
, m_context_arg
,
1086 build_int_cst (ptr_type
,
1087 PHSA_CONTEXT_OFFS_WI_IDS
1089 std::string
name ("wiid_x");
1090 name
[name
.length() - 1] += dim
;
1091 return add_temp_var (name
.c_str(), ctx
);
1096 else if (brig_opcode
== BRIG_OPCODE_WORKGROUPID
)
1098 HOST_WIDE_INT dim
= int_constant_value (operands
[0]);
1099 if (flag_assume_phsa
)
1101 tree ptr_type
= build_pointer_type (uint32_type_node
);
1102 tree ctx
= build2 (MEM_REF
, uint32_type_node
, m_context_arg
,
1103 build_int_cst (ptr_type
,
1104 PHSA_CONTEXT_OFFS_WG_IDS
1106 std::string
name ("wgid_x");
1107 name
[name
.length() - 1] += dim
;
1108 return add_temp_var (name
.c_str(), ctx
);
1109 } else if (m_is_kernel
)
1110 return m_wg_id_vars
[dim
];
1114 else if (brig_opcode
== BRIG_OPCODE_CURRENTWORKGROUPSIZE
)
1116 HOST_WIDE_INT dim
= int_constant_value (operands
[0]);
1117 if (flag_assume_phsa
)
1119 tree ptr_type
= build_pointer_type (uint32_type_node
);
1120 tree ctx
= build2 (MEM_REF
, uint32_type_node
, m_context_arg
,
1121 build_int_cst (ptr_type
,
1122 PHSA_CONTEXT_CURRENT_WG_SIZES
1124 std::string
name ("curwgsize_x");
1125 name
[name
.length() - 1] += dim
;
1126 return add_temp_var (name
.c_str(), ctx
);
1127 } else if (m_is_kernel
)
1128 return m_cur_wg_size_vars
[dim
];
1138 /* Returns true in case the given opcode that would normally be generated
1139 as a builtin call can be expanded to tree nodes. */
1142 brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode
) const
1144 switch (brig_opcode
)
1146 case BRIG_OPCODE_CURRENTWORKGROUPSIZE
:
1147 case BRIG_OPCODE_WORKITEMFLATID
:
1148 case BRIG_OPCODE_WORKITEMID
:
1149 case BRIG_OPCODE_WORKGROUPID
:
1150 case BRIG_OPCODE_WORKGROUPSIZE
:
1151 return m_is_kernel
|| flag_assume_phsa
;
1152 case BRIG_OPCODE_WORKITEMFLATABSID
:
1153 case BRIG_OPCODE_WORKITEMABSID
:
1160 /* In case the HSA instruction must be implemented using a builtin,
1161 this function is called to get the correct builtin function.
1162 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
1163 brig instruction and BRIG_TYPE the brig instruction's type. */
1166 brig_function::get_builtin_for_hsa_opcode
1167 (tree type
, BrigOpcode16_t brig_opcode
, BrigType16_t brig_type
) const
1169 tree builtin
= NULL_TREE
;
1170 tree builtin_type
= type
;
1172 /* For vector types, first find the scalar version of the builtin. */
1173 if (type
!= NULL_TREE
&& VECTOR_TYPE_P (type
))
1174 builtin_type
= TREE_TYPE (type
);
1175 BrigType16_t brig_inner_type
= brig_type
& BRIG_TYPE_BASE_MASK
;
1177 /* Some BRIG opcodes can use the same builtins for unsigned and
1178 signed types. Force these cases to unsigned types. */
1180 if (brig_opcode
== BRIG_OPCODE_BORROW
1181 || brig_opcode
== BRIG_OPCODE_CARRY
1182 || brig_opcode
== BRIG_OPCODE_LASTBIT
1183 || brig_opcode
== BRIG_OPCODE_BITINSERT
)
1185 if (brig_type
== BRIG_TYPE_S32
)
1186 brig_type
= BRIG_TYPE_U32
;
1187 else if (brig_type
== BRIG_TYPE_S64
)
1188 brig_type
= BRIG_TYPE_U64
;
1191 switch (brig_opcode
)
1193 case BRIG_OPCODE_FLOOR
:
1194 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_FLOOR
);
1196 case BRIG_OPCODE_CEIL
:
1197 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_CEIL
);
1199 case BRIG_OPCODE_SQRT
:
1200 case BRIG_OPCODE_NSQRT
:
1201 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_SQRT
);
1203 case BRIG_OPCODE_RINT
:
1204 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_RINT
);
1206 case BRIG_OPCODE_TRUNC
:
1207 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_TRUNC
);
1209 case BRIG_OPCODE_COPYSIGN
:
1210 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_COPYSIGN
);
1212 case BRIG_OPCODE_NSIN
:
1213 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_SIN
);
1215 case BRIG_OPCODE_NLOG2
:
1216 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_LOG2
);
1218 case BRIG_OPCODE_NEXP2
:
1219 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_EXP2
);
1221 case BRIG_OPCODE_FMA
:
1222 case BRIG_OPCODE_NFMA
:
1223 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_FMA
);
1225 case BRIG_OPCODE_NCOS
:
1226 builtin
= mathfn_built_in (builtin_type
, BUILT_IN_COS
);
1228 case BRIG_OPCODE_POPCOUNT
:
1229 /* Popcount should be typed by its argument type (the return value
1230 is always u32). Let's use a b64 version for also for b32 for now. */
1231 return builtin_decl_explicit (BUILT_IN_POPCOUNTL
);
1232 case BRIG_OPCODE_BORROW
:
1233 /* Borrow uses the same builtin for unsigned and signed types. */
1234 if (brig_type
== BRIG_TYPE_S32
|| brig_type
== BRIG_TYPE_U32
)
1235 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32
);
1237 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64
);
1238 case BRIG_OPCODE_CARRY
:
1239 /* Carry also uses the same builtin for unsigned and signed types. */
1240 if (brig_type
== BRIG_TYPE_S32
|| brig_type
== BRIG_TYPE_U32
)
1241 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32
);
1243 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64
);
1246 /* Use our builtin index for finding a proper builtin for the BRIG
1247 opcode and BRIG type. This takes care most of the builtin cases,
1248 the special cases are handled in the separate 'case' statements
1250 builtin_map::const_iterator i
1251 = s_custom_builtins
.find (std::make_pair (brig_opcode
, brig_type
));
1252 if (i
!= s_custom_builtins
.end ())
1255 if (brig_inner_type
!= brig_type
)
1257 /* Try to find a scalar built-in we could use. */
1258 i
= s_custom_builtins
.find
1259 (std::make_pair (brig_opcode
, brig_inner_type
));
1260 if (i
!= s_custom_builtins
.end ())
1264 /* In case this is an fp16 operation that is promoted to fp32,
1265 try to find a fp32 scalar built-in. */
1266 if (brig_inner_type
== BRIG_TYPE_F16
)
1268 i
= s_custom_builtins
.find
1269 (std::make_pair (brig_opcode
, BRIG_TYPE_F32
));
1270 if (i
!= s_custom_builtins
.end ())
1276 if (VECTOR_TYPE_P (type
) && builtin
!= NULL_TREE
)
1278 /* Try to find a vectorized version of the built-in.
1279 TODO: properly assert that builtin is a mathfn builtin? */
1281 = targetm
.vectorize
.builtin_vectorized_function
1282 (builtin_mathfn_code (builtin
), type
, type
);
1283 if (vec_builtin
!= NULL_TREE
)
1288 if (builtin
== NULL_TREE
)
1293 /* Unpacks the elements of the vector in VALUE to scalars (bit field
1294 references) in ELEMENTS. */
1297 brig_function::unpack (tree value
, tree_stl_vec
&elements
)
1299 size_t vec_size
= int_size_in_bytes (TREE_TYPE (value
));
1301 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value
))) * BITS_PER_UNIT
;
1302 size_t element_count
1303 = vec_size
* BITS_PER_UNIT
/ element_size
;
1305 tree input_element_type
= TREE_TYPE (TREE_TYPE (value
));
1307 value
= add_temp_var ("unpack_input", value
);
1309 for (size_t i
= 0; i
< element_count
; ++i
)
1312 = build3 (BIT_FIELD_REF
, input_element_type
, value
,
1313 TYPE_SIZE (input_element_type
),
1314 bitsize_int(i
* element_size
));
1316 element
= add_temp_var ("scalar", element
);
1317 elements
.push_back (element
);
1321 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */
1324 brig_function::pack (tree_stl_vec
&elements
)
1326 size_t element_count
= elements
.size ();
1328 gcc_assert (element_count
> 1);
1330 tree output_element_type
= TREE_TYPE (elements
.at (0));
1332 vec
<constructor_elt
, va_gc
> *constructor_vals
= NULL
;
1333 for (size_t i
= 0; i
< element_count
; ++i
)
1334 CONSTRUCTOR_APPEND_ELT (constructor_vals
, NULL_TREE
, elements
.at (i
));
1336 tree vec_type
= build_vector_type (output_element_type
, element_count
);
1338 /* build_constructor creates a vector type which is not a vector_cst
1339 that requires compile time constant elements. */
1340 tree vec
= build_constructor (vec_type
, constructor_vals
);
1342 /* Add a temp variable for readability. */
1343 tree tmp_var
= create_tmp_var (vec_type
, "vec_out");
1344 tree vec_tmp_assign
= build2 (MODIFY_EXPR
, TREE_TYPE (tmp_var
), tmp_var
, vec
);
1345 append_statement (vec_tmp_assign
);
1349 /* Returns true in case the given opcode needs to know about work-item context
1350 data. In such case the context data is passed as a pointer to a work-item
1351 context object, as the last argument in the builtin call. */
1354 brig_function::needs_workitem_context_data
1355 (BrigOpcode16_t brig_opcode
)
1357 switch (brig_opcode
)
1359 case BRIG_OPCODE_WORKITEMABSID
:
1360 case BRIG_OPCODE_WORKITEMFLATABSID
:
1361 case BRIG_OPCODE_WORKITEMFLATID
:
1362 case BRIG_OPCODE_CURRENTWORKITEMFLATID
:
1363 case BRIG_OPCODE_WORKITEMID
:
1364 case BRIG_OPCODE_WORKGROUPID
:
1365 case BRIG_OPCODE_WORKGROUPSIZE
:
1366 case BRIG_OPCODE_CURRENTWORKGROUPSIZE
:
1367 case BRIG_OPCODE_GRIDGROUPS
:
1368 case BRIG_OPCODE_GRIDSIZE
:
1369 case BRIG_OPCODE_DIM
:
1370 case BRIG_OPCODE_PACKETID
:
1371 case BRIG_OPCODE_PACKETCOMPLETIONSIG
:
1372 case BRIG_OPCODE_BARRIER
:
1373 case BRIG_OPCODE_WAVEBARRIER
:
1374 case BRIG_OPCODE_ARRIVEFBAR
:
1375 case BRIG_OPCODE_INITFBAR
:
1376 case BRIG_OPCODE_JOINFBAR
:
1377 case BRIG_OPCODE_LEAVEFBAR
:
1378 case BRIG_OPCODE_RELEASEFBAR
:
1379 case BRIG_OPCODE_WAITFBAR
:
1380 case BRIG_OPCODE_CUID
:
1381 case BRIG_OPCODE_MAXCUID
:
1382 case BRIG_OPCODE_DEBUGTRAP
:
1383 case BRIG_OPCODE_GROUPBASEPTR
:
1384 case BRIG_OPCODE_KERNARGBASEPTR
:
1385 case BRIG_OPCODE_ALLOCA
:
1392 /* Appends and returns a new temp variable and an accompanying assignment
1393 statement that stores the value of the given EXPR and has the given NAME. */
1396 brig_function::add_temp_var (std::string name
, tree expr
)
1398 tree temp_var
= create_tmp_var (TREE_TYPE (expr
), name
.c_str ());
1399 tree assign
= build2 (MODIFY_EXPR
, TREE_TYPE (temp_var
), temp_var
, expr
);
1400 append_statement (assign
);
1404 /* Returns the integer constant value of the given node.
1405 If it's a cast, looks into the source of the cast. */
1408 brig_function::int_constant_value (tree node
)
1411 if (TREE_CODE (n
) == VIEW_CONVERT_EXPR
)
1412 n
= TREE_OPERAND (n
, 0);
1413 return int_cst_value (n
);
1416 /* Returns the tree code that should be used to implement the given
1417 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
1418 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
1419 returns TREE_LIST (if it can be emulated with a simple chain of tree
1420 nodes) or CALL_EXPR if the opcode should be implemented using a builtin
1424 brig_function::get_tree_code_for_hsa_opcode
1425 (BrigOpcode16_t brig_opcode
, BrigType16_t brig_type
)
1427 BrigType16_t brig_inner_type
= brig_type
& BRIG_TYPE_BASE_MASK
;
1428 switch (brig_opcode
)
1430 case BRIG_OPCODE_NOP
:
1432 case BRIG_OPCODE_ADD
:
1434 case BRIG_OPCODE_CMOV
:
1435 if (brig_inner_type
== brig_type
)
1438 return VEC_COND_EXPR
;
1439 case BRIG_OPCODE_SUB
:
1441 case BRIG_OPCODE_MUL
:
1442 case BRIG_OPCODE_MUL24
:
1444 case BRIG_OPCODE_MULHI
:
1445 case BRIG_OPCODE_MUL24HI
:
1446 return MULT_HIGHPART_EXPR
;
1447 case BRIG_OPCODE_DIV
:
1448 if (gccbrig_is_float_type (brig_inner_type
))
1451 return TRUNC_DIV_EXPR
;
1452 case BRIG_OPCODE_NEG
:
1454 case BRIG_OPCODE_MIN
:
1455 if (gccbrig_is_float_type (brig_inner_type
))
1459 case BRIG_OPCODE_MAX
:
1460 if (gccbrig_is_float_type (brig_inner_type
))
1464 case BRIG_OPCODE_ABS
:
1466 case BRIG_OPCODE_SHL
:
1468 case BRIG_OPCODE_SHR
:
1470 case BRIG_OPCODE_OR
:
1471 return BIT_IOR_EXPR
;
1472 case BRIG_OPCODE_XOR
:
1473 return BIT_XOR_EXPR
;
1474 case BRIG_OPCODE_AND
:
1475 return BIT_AND_EXPR
;
1476 case BRIG_OPCODE_NOT
:
1477 return BIT_NOT_EXPR
;
1478 case BRIG_OPCODE_RET
:
1480 case BRIG_OPCODE_MOV
:
1481 case BRIG_OPCODE_LDF
:
1483 case BRIG_OPCODE_LD
:
1484 case BRIG_OPCODE_ST
:
1486 case BRIG_OPCODE_BR
:
1488 case BRIG_OPCODE_REM
:
1489 if (brig_type
== BRIG_TYPE_U64
|| brig_type
== BRIG_TYPE_U32
)
1490 return TRUNC_MOD_EXPR
;
1493 case BRIG_OPCODE_NRCP
:
1494 case BRIG_OPCODE_NRSQRT
:
1495 /* Implement as 1/f (x). gcc should pattern detect that and
1496 use a native instruction, if available, for it. */
1498 case BRIG_OPCODE_FMA
:
1499 case BRIG_OPCODE_FLOOR
:
1500 case BRIG_OPCODE_CEIL
:
1501 case BRIG_OPCODE_SQRT
:
1502 case BRIG_OPCODE_NSQRT
:
1503 case BRIG_OPCODE_RINT
:
1504 case BRIG_OPCODE_TRUNC
:
1505 case BRIG_OPCODE_POPCOUNT
:
1506 case BRIG_OPCODE_COPYSIGN
:
1507 case BRIG_OPCODE_NCOS
:
1508 case BRIG_OPCODE_NSIN
:
1509 case BRIG_OPCODE_NLOG2
:
1510 case BRIG_OPCODE_NEXP2
:
1511 case BRIG_OPCODE_NFMA
:
1512 /* Class has type B1 regardless of the float type, thus
1513 the below builtin map search cannot find it. */
1514 case BRIG_OPCODE_CLASS
:
1515 case BRIG_OPCODE_WORKITEMABSID
:
1519 /* Some BRIG opcodes can use the same builtins for unsigned and
1520 signed types. Force these cases to unsigned types.
1523 if (brig_opcode
== BRIG_OPCODE_BORROW
1524 || brig_opcode
== BRIG_OPCODE_CARRY
1525 || brig_opcode
== BRIG_OPCODE_LASTBIT
1526 || brig_opcode
== BRIG_OPCODE_BITINSERT
)
1528 if (brig_type
== BRIG_TYPE_S32
)
1529 brig_type
= BRIG_TYPE_U32
;
1530 else if (brig_type
== BRIG_TYPE_S64
)
1531 brig_type
= BRIG_TYPE_U64
;
1535 builtin_map::const_iterator i
1536 = s_custom_builtins
.find (std::make_pair (brig_opcode
, brig_type
));
1537 if (i
!= s_custom_builtins
.end ())
1539 else if (s_custom_builtins
.find
1540 (std::make_pair (brig_opcode
, brig_inner_type
))
1541 != s_custom_builtins
.end ())
1543 if (brig_inner_type
== BRIG_TYPE_F16
1544 && s_custom_builtins
.find
1545 (std::make_pair (brig_opcode
, BRIG_TYPE_F32
))
1546 != s_custom_builtins
.end ())
1550 return TREE_LIST
; /* Emulate using a chain of nodes. */
1553 /* Inform of an update to the REG_VAR. */
1556 brig_function::add_reg_var_update (tree reg_var
, tree var
)
1558 if (var
== m_abs_id_vars
[0] || var
== m_abs_id_vars
[1]
1559 || var
== m_abs_id_vars
[2] || var
== m_local_id_vars
[0]
1560 || var
== m_local_id_vars
[1] || var
== m_local_id_vars
[2])
1561 m_id_val_defs
[reg_var
] = var
;
1564 /* Possible overwrite of an ID value. */
1566 id_val_map::iterator i
= m_id_val_defs
.find (reg_var
);
1567 if (i
!= m_id_val_defs
.end())
1568 m_id_val_defs
.erase (i
);
1572 /* If the REG_VAR is known to contain an ID value at this point in
1573 the basic block, return true. */
1576 brig_function::is_id_val (tree reg_var
)
1578 id_val_map::iterator i
= m_id_val_defs
.find (reg_var
);
1579 return i
!= m_id_val_defs
.end();
1582 /* Return an ID value for the given REG_VAR if its known to contain
1583 one at this point in the BB, NULL_TREE otherwise. */
1586 brig_function::id_val (tree reg_var
)
1588 id_val_map::iterator i
= m_id_val_defs
.find (reg_var
);
1589 if (i
!= m_id_val_defs
.end())
1595 /* Informs of starting a new basic block. Called when generating
1596 a label, a call, a jump, or a return. */
1599 brig_function::start_new_bb ()
1601 m_id_val_defs
.clear ();