1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2019 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "hash-table.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
37 #include "basic-block.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
46 #include "print-tree.h"
47 #include "symbol-summary.h"
48 #include "hsa-common.h"
49 #include "gomp-constants.h"
51 /* Convert VAL to little endian form, if necessary. */
54 lendian16 (uint16_t val
)
56 #if GCC_VERSION >= 4008
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 return __builtin_bswap16 (val
);
61 #else /* __ORDER_PDP_ENDIAN__ */
65 // provide a safe slower default, with shifts and masking
66 #ifndef WORDS_BIGENDIAN
69 return (val
>> 8) | (val
<< 8);
74 /* Convert VAL to little endian form, if necessary. */
77 lendian32 (uint32_t val
)
79 #if GCC_VERSION >= 4006
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83 return __builtin_bswap32 (val
);
84 #else /* __ORDER_PDP_ENDIAN__ */
85 return (val
>> 16) | (val
<< 16);
88 // provide a safe slower default, with shifts and masking
89 #ifndef WORDS_BIGENDIAN
92 val
= ((val
& 0xff00ff00) >> 8) | ((val
& 0xff00ff) << 8);
93 return (val
>> 16) | (val
<< 16);
98 /* Convert VAL to little endian form, if necessary. */
101 lendian64 (uint64_t val
)
103 #if GCC_VERSION >= 4006
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107 return __builtin_bswap64 (val
);
108 #else /* __ORDER_PDP_ENDIAN__ */
109 return (((val
& 0xffffll
) << 48)
110 | ((val
& 0xffff0000ll
) << 16)
111 | ((val
& 0xffff00000000ll
) >> 16)
112 | ((val
& 0xffff000000000000ll
) >> 48));
115 // provide a safe slower default, with shifts and masking
116 #ifndef WORDS_BIGENDIAN
119 val
= (((val
& 0xff00ff00ff00ff00ll
) >> 8)
120 | ((val
& 0x00ff00ff00ff00ffll
) << 8));
121 val
= ((( val
& 0xffff0000ffff0000ll
) >> 16)
122 | (( val
& 0x0000ffff0000ffffll
) << 16));
123 return (val
>> 32) | (val
<< 32);
128 #define BRIG_ELF_SECTION_NAME ".brig"
129 #define BRIG_LABEL_STRING "hsa_brig"
130 #define BRIG_SECTION_DATA_NAME "hsa_data"
131 #define BRIG_SECTION_CODE_NAME "hsa_code"
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136 /* Required HSA section alignment. */
138 #define HSA_SECTION_ALIGNMENT 16
140 /* Chunks of BRIG binary data. */
142 struct hsa_brig_data_chunk
144 /* Size of the data already stored into a chunk. */
147 /* Pointer to the data. */
151 /* Structure representing a BRIG section, holding and writing its data. */
153 class hsa_brig_section
156 /* Section name that will be output to the BRIG. */
157 const char *section_name
;
158 /* Size in bytes of all data stored in the section. */
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count
;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta
;
165 void init (const char *name
);
168 unsigned add (const void *data
, unsigned len
, void **output
= NULL
);
169 void round_size_up (int factor
);
170 void *get_ptr_by_offset (unsigned int offset
);
173 void allocate_new_chunk ();
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec
<struct hsa_brig_data_chunk
> chunks
;
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk
*cur_chunk
;
182 static struct hsa_brig_section brig_data
, brig_code
, brig_operand
;
183 static uint32_t brig_insn_count
;
184 static bool brig_initialized
= false;
186 /* Mapping between emitted HSA functions and their offset in code segment. */
187 static hash_map
<tree
, BrigCodeOffset32_t
> *function_offsets
;
189 /* Hash map of emitted function declarations. */
190 static hash_map
<tree
, BrigDirectiveExecutable
*> *emitted_declarations
;
192 /* Hash table of emitted internal function declaration offsets. */
193 hash_table
<hsa_internal_fn_hasher
> *hsa_emitted_internal_decls
;
195 /* List of sbr instructions. */
196 static vec
<hsa_insn_sbr
*> *switch_instructions
;
198 struct function_linkage_pair
200 function_linkage_pair (tree decl
, unsigned int off
)
201 : function_decl (decl
), offset (off
) {}
203 /* Declaration of called function. */
206 /* Offset in operand section. */
210 /* Vector of function calls where we need to resolve function offsets. */
211 static auto_vec
<function_linkage_pair
> function_call_linkage
;
213 /* Add a new chunk, allocate data for it and initialize it. */
216 hsa_brig_section::allocate_new_chunk ()
218 struct hsa_brig_data_chunk new_chunk
;
220 new_chunk
.data
= XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE
);
222 cur_chunk
= chunks
.safe_push (new_chunk
);
225 /* Initialize the brig section. */
228 hsa_brig_section::init (const char *name
)
231 /* While the following computation is basically wrong, because the intent
232 certainly wasn't to have the first character of name and padding, which
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
234 this is what the disassembler expects. */
235 total_size
= sizeof (BrigSectionHeader
) + strlen (section_name
);
237 allocate_new_chunk ();
238 header_byte_delta
= total_size
;
240 header_byte_count
= total_size
;
243 /* Free all data in the section. */
246 hsa_brig_section::release ()
248 for (unsigned i
= 0; i
< chunks
.length (); i
++)
249 free (chunks
[i
].data
);
254 /* Write the section to the output file to a section with the name given at
255 initialization. Switches the output section and does not restore it. */
258 hsa_brig_section::output ()
260 struct BrigSectionHeader section_header
;
263 section_header
.byteCount
= lendian64 (total_size
);
264 section_header
.headerByteCount
= lendian32 (header_byte_count
);
265 section_header
.nameLength
= lendian32 (strlen (section_name
));
266 assemble_string ((const char *) §ion_header
, 16);
267 assemble_string (section_name
, (section_header
.nameLength
));
268 memset (&padding
, 0, sizeof (padding
));
269 /* This is also a consequence of the wrong header size computation described
270 in a comment in hsa_brig_section::init. */
271 assemble_string (padding
, 8);
272 for (unsigned i
= 0; i
< chunks
.length (); i
++)
273 assemble_string (chunks
[i
].data
, chunks
[i
].size
);
276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
278 the place where DATA was actually stored. */
281 hsa_brig_section::add (const void *data
, unsigned len
, void **output
)
283 unsigned offset
= total_size
;
285 gcc_assert (len
<= BRIG_CHUNK_MAX_SIZE
);
286 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- len
))
287 allocate_new_chunk ();
289 char *dst
= cur_chunk
->data
+ cur_chunk
->size
;
290 memcpy (dst
, data
, len
);
293 cur_chunk
->size
+= len
;
299 /* Add padding to section so that its size is divisible by FACTOR. */
302 hsa_brig_section::round_size_up (int factor
)
304 unsigned padding
, res
= total_size
% factor
;
309 padding
= factor
- res
;
310 total_size
+= padding
;
311 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- padding
))
313 padding
-= BRIG_CHUNK_MAX_SIZE
- cur_chunk
->size
;
314 cur_chunk
->size
= BRIG_CHUNK_MAX_SIZE
;
315 allocate_new_chunk ();
318 cur_chunk
->size
+= padding
;
321 /* Return pointer to data by global OFFSET in the section. */
324 hsa_brig_section::get_ptr_by_offset (unsigned int offset
)
326 gcc_assert (offset
< total_size
);
327 offset
-= header_byte_delta
;
330 for (i
= 0; offset
>= chunks
[i
].size
; i
++)
331 offset
-= chunks
[i
].size
;
333 return chunks
[i
].data
+ offset
;
336 /* BRIG string data hashing. */
338 struct brig_string_slot
346 /* Hash table helpers. */
348 struct brig_string_slot_hasher
: pointer_hash
<brig_string_slot
>
350 static inline hashval_t
hash (const value_type
);
351 static inline bool equal (const value_type
, const compare_type
);
352 static inline void remove (value_type
);
355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
356 to support strings that may not end in '\0'. */
359 brig_string_slot_hasher::hash (const value_type ds
)
361 hashval_t r
= ds
->len
;
364 for (i
= 0; i
< ds
->len
; i
++)
365 r
= r
* 67 + (unsigned) ds
->s
[i
] - 113;
366 r
= r
* 67 + (unsigned) ds
->prefix
- 113;
370 /* Returns nonzero if DS1 and DS2 are equal. */
373 brig_string_slot_hasher::equal (const value_type ds1
, const compare_type ds2
)
375 if (ds1
->len
== ds2
->len
)
376 return ds1
->prefix
== ds2
->prefix
377 && memcmp (ds1
->s
, ds2
->s
, ds1
->len
) == 0;
382 /* Deallocate memory for DS upon its removal. */
385 brig_string_slot_hasher::remove (value_type ds
)
387 free (const_cast<char *> (ds
->s
));
391 /* Hash for strings we output in order not to duplicate them needlessly. */
393 static hash_table
<brig_string_slot_hasher
> *brig_string_htab
;
395 /* Emit a null terminated string STR to the data section and return its
396 offset in it. If PREFIX is non-zero, output it just before STR too.
397 Sanitize the string if SANITIZE option is set to true. */
400 brig_emit_string (const char *str
, char prefix
= 0, bool sanitize
= true)
402 unsigned slen
= strlen (str
);
403 unsigned offset
, len
= slen
+ (prefix
? 1 : 0);
404 uint32_t hdr_len
= lendian32 (len
);
405 brig_string_slot s_slot
;
406 brig_string_slot
**slot
;
409 str2
= xstrdup (str
);
412 hsa_sanitize_name (str2
);
415 s_slot
.prefix
= prefix
;
418 slot
= brig_string_htab
->find_slot (&s_slot
, INSERT
);
421 brig_string_slot
*new_slot
= XCNEW (brig_string_slot
);
423 /* In theory we should fill in BrigData but that would mean copying
424 the string to a buffer for no reason, so we just emulate it. */
425 offset
= brig_data
.add (&hdr_len
, sizeof (hdr_len
));
427 brig_data
.add (&prefix
, 1);
429 brig_data
.add (str2
, slen
);
430 brig_data
.round_size_up (4);
432 /* TODO: could use the string we just copied into
433 brig_string->cur_chunk */
435 new_slot
->len
= slen
;
436 new_slot
->prefix
= prefix
;
437 new_slot
->offset
= offset
;
442 offset
= (*slot
)->offset
;
449 /* Linked list of queued operands. */
451 static struct operand_queue
453 /* First from the chain of queued operands. */
454 hsa_op_base
*first_op
, *last_op
;
456 /* The offset at which the next operand will be enqueued. */
457 unsigned projected_size
;
461 /* Unless already initialized, initialize infrastructure to produce BRIG. */
468 if (brig_initialized
)
471 brig_string_htab
= new hash_table
<brig_string_slot_hasher
> (37);
472 brig_data
.init (BRIG_SECTION_DATA_NAME
);
473 brig_code
.init (BRIG_SECTION_CODE_NAME
);
474 brig_operand
.init (BRIG_SECTION_OPERAND_NAME
);
475 brig_initialized
= true;
477 struct BrigDirectiveModule moddir
;
478 memset (&moddir
, 0, sizeof (moddir
));
479 moddir
.base
.byteCount
= lendian16 (sizeof (moddir
));
482 if (main_input_filename
&& *main_input_filename
!= '\0')
484 const char *part
= strrchr (main_input_filename
, '/');
486 part
= main_input_filename
;
489 modname
= concat ("&__hsa_module_", part
, NULL
);
490 char *extension
= strchr (modname
, '.');
494 /* As in LTO mode, we have to emit a different module names. */
497 part
= strrchr (asm_file_name
, '/');
499 part
= asm_file_name
;
503 modname2
= xasprintf ("%s_%s", modname
, part
);
508 hsa_sanitize_name (modname
);
509 moddir
.name
= brig_emit_string (modname
);
513 moddir
.name
= brig_emit_string ("__hsa_module_unnamed", '&');
514 moddir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_MODULE
);
515 moddir
.hsailMajor
= lendian32 (BRIG_VERSION_HSAIL_MAJOR
);
516 moddir
.hsailMinor
= lendian32 (BRIG_VERSION_HSAIL_MINOR
);
517 moddir
.profile
= hsa_full_profile_p () ? BRIG_PROFILE_FULL
: BRIG_PROFILE_BASE
;
518 if (hsa_machine_large_p ())
519 moddir
.machineModel
= BRIG_MACHINE_LARGE
;
521 moddir
.machineModel
= BRIG_MACHINE_SMALL
;
522 moddir
.defaultFloatRound
= BRIG_ROUND_FLOAT_DEFAULT
;
523 brig_code
.add (&moddir
, sizeof (moddir
));
526 /* Free all BRIG data. */
529 brig_release_data (void)
531 delete brig_string_htab
;
532 brig_data
.release ();
533 brig_code
.release ();
534 brig_operand
.release ();
536 brig_initialized
= 0;
539 /* Enqueue operation OP. Return the offset at which it will be stored. */
542 enqueue_op (hsa_op_base
*op
)
546 if (op
->m_brig_op_offset
)
547 return op
->m_brig_op_offset
;
549 ret
= op_queue
.projected_size
;
550 op
->m_brig_op_offset
= op_queue
.projected_size
;
552 if (!op_queue
.first_op
)
553 op_queue
.first_op
= op
;
555 op_queue
.last_op
->m_next
= op
;
556 op_queue
.last_op
= op
;
558 if (is_a
<hsa_op_immed
*> (op
))
559 op_queue
.projected_size
+= sizeof (struct BrigOperandConstantBytes
);
560 else if (is_a
<hsa_op_reg
*> (op
))
561 op_queue
.projected_size
+= sizeof (struct BrigOperandRegister
);
562 else if (is_a
<hsa_op_address
*> (op
))
563 op_queue
.projected_size
+= sizeof (struct BrigOperandAddress
);
564 else if (is_a
<hsa_op_code_ref
*> (op
))
565 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeRef
);
566 else if (is_a
<hsa_op_code_list
*> (op
))
567 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeList
);
568 else if (is_a
<hsa_op_operand_list
*> (op
))
569 op_queue
.projected_size
+= sizeof (struct BrigOperandOperandList
);
575 static void emit_immediate_operand (hsa_op_immed
*imm
);
577 /* Emit directive describing a symbol if it has not been emitted already.
578 Return the offset of the directive. */
581 emit_directive_variable (struct hsa_symbol
*symbol
)
583 struct BrigDirectiveVariable dirvar
;
584 unsigned name_offset
;
585 static unsigned res_name_offset
;
587 if (symbol
->m_directive_offset
)
588 return symbol
->m_directive_offset
;
590 memset (&dirvar
, 0, sizeof (dirvar
));
591 dirvar
.base
.byteCount
= lendian16 (sizeof (dirvar
));
592 dirvar
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE
);
593 dirvar
.allocation
= symbol
->m_allocation
;
595 char prefix
= symbol
->m_global_scope_p
? '&' : '%';
597 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == RESULT_DECL
)
599 if (res_name_offset
== 0)
600 res_name_offset
= brig_emit_string (symbol
->m_name
, '%');
601 name_offset
= res_name_offset
;
603 else if (symbol
->m_name
)
604 name_offset
= brig_emit_string (symbol
->m_name
, prefix
);
608 snprintf (buf
, 64, "__%s_%i", hsa_seg_name (symbol
->m_segment
),
609 symbol
->m_name_number
);
610 name_offset
= brig_emit_string (buf
, prefix
);
613 dirvar
.name
= lendian32 (name_offset
);
615 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == CONST_DECL
)
617 hsa_op_immed
*tmp
= new hsa_op_immed (DECL_INITIAL (symbol
->m_decl
));
618 dirvar
.init
= lendian32 (enqueue_op (tmp
));
622 dirvar
.type
= lendian16 (symbol
->m_type
);
623 dirvar
.segment
= symbol
->m_segment
;
624 dirvar
.align
= symbol
->m_align
;
625 dirvar
.linkage
= symbol
->m_linkage
;
626 dirvar
.dim
.lo
= symbol
->m_dim
;
627 dirvar
.dim
.hi
= symbol
->m_dim
>> 32;
629 /* Global variables are just declared and linked via HSA runtime. */
630 if (symbol
->m_linkage
!= BRIG_ALLOCATION_PROGRAM
)
631 dirvar
.modifier
|= BRIG_VARIABLE_DEFINITION
;
634 if (symbol
->m_cst_value
)
636 dirvar
.modifier
|= BRIG_VARIABLE_CONST
;
637 dirvar
.init
= lendian32 (enqueue_op (symbol
->m_cst_value
));
640 symbol
->m_directive_offset
= brig_code
.add (&dirvar
, sizeof (dirvar
));
641 return symbol
->m_directive_offset
;
644 /* Emit directives describing either a function declaration or definition F and
645 return the produced BrigDirectiveExecutable structure. The function does
646 not take into account any instructions when calculating nextModuleEntry
647 field of the produced BrigDirectiveExecutable structure so when emitting
648 actual definitions, this field needs to be updated after all of the function
649 is actually added to the code section. */
651 static BrigDirectiveExecutable
*
652 emit_function_directives (hsa_function_representation
*f
, bool is_declaration
)
654 struct BrigDirectiveExecutable fndir
;
655 unsigned name_offset
, inarg_off
, scoped_off
, next_toplev_off
;
660 if (!f
->m_declaration_p
)
661 for (int i
= 0; f
->m_global_symbols
.iterate (i
, &sym
); i
++)
663 gcc_assert (!sym
->m_emitted_to_brig
);
664 sym
->m_emitted_to_brig
= true;
665 emit_directive_variable (sym
);
669 name_offset
= brig_emit_string (f
->m_name
, '&');
670 inarg_off
= brig_code
.total_size
+ sizeof (fndir
)
671 + (f
->m_output_arg
? sizeof (struct BrigDirectiveVariable
) : 0);
672 scoped_off
= inarg_off
673 + f
->m_input_args
.length () * sizeof (struct BrigDirectiveVariable
);
675 if (!f
->m_declaration_p
)
677 count
+= f
->m_spill_symbols
.length ();
678 count
+= f
->m_private_variables
.length ();
681 next_toplev_off
= scoped_off
+ count
* sizeof (struct BrigDirectiveVariable
);
683 memset (&fndir
, 0, sizeof (fndir
));
684 fndir
.base
.byteCount
= lendian16 (sizeof (fndir
));
685 fndir
.base
.kind
= lendian16 (f
->m_kern_p
? BRIG_KIND_DIRECTIVE_KERNEL
686 : BRIG_KIND_DIRECTIVE_FUNCTION
);
687 fndir
.name
= lendian32 (name_offset
);
688 fndir
.inArgCount
= lendian16 (f
->m_input_args
.length ());
689 fndir
.outArgCount
= lendian16 (f
->m_output_arg
? 1 : 0);
690 fndir
.firstInArg
= lendian32 (inarg_off
);
691 fndir
.firstCodeBlockEntry
= lendian32 (scoped_off
);
692 fndir
.nextModuleEntry
= lendian32 (next_toplev_off
);
693 fndir
.linkage
= f
->get_linkage ();
694 if (!f
->m_declaration_p
)
695 fndir
.modifier
|= BRIG_EXECUTABLE_DEFINITION
;
696 memset (&fndir
.reserved
, 0, sizeof (fndir
.reserved
));
698 /* Once we put a definition of function_offsets, we should not overwrite
699 it with a declaration of the function. */
700 if (f
->m_internal_fn
== NULL
)
702 if (!function_offsets
->get (f
->m_decl
) || !is_declaration
)
703 function_offsets
->put (f
->m_decl
, brig_code
.total_size
);
707 /* Internal function. */
708 hsa_internal_fn
**slot
709 = hsa_emitted_internal_decls
->find_slot (f
->m_internal_fn
, INSERT
);
710 hsa_internal_fn
*int_fn
= new hsa_internal_fn (f
->m_internal_fn
);
711 int_fn
->m_offset
= brig_code
.total_size
;
715 brig_code
.add (&fndir
, sizeof (fndir
), &ptr_to_fndir
);
718 emit_directive_variable (f
->m_output_arg
);
719 for (unsigned i
= 0; i
< f
->m_input_args
.length (); i
++)
720 emit_directive_variable (f
->m_input_args
[i
]);
722 if (!f
->m_declaration_p
)
724 for (int i
= 0; f
->m_spill_symbols
.iterate (i
, &sym
); i
++)
726 emit_directive_variable (sym
);
729 for (unsigned i
= 0; i
< f
->m_private_variables
.length (); i
++)
731 emit_directive_variable (f
->m_private_variables
[i
]);
736 return (BrigDirectiveExecutable
*) ptr_to_fndir
;
739 /* Emit a label directive for the given HBB. We assume it is about to start on
740 the current offset in the code section. */
743 emit_bb_label_directive (hsa_bb
*hbb
)
745 struct BrigDirectiveLabel lbldir
;
747 lbldir
.base
.byteCount
= lendian16 (sizeof (lbldir
));
748 lbldir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_LABEL
);
750 snprintf (buf
, 32, "BB_%u_%i", DECL_UID (current_function_decl
),
752 lbldir
.name
= lendian32 (brig_emit_string (buf
, '@'));
754 hbb
->m_label_ref
.m_directive_offset
= brig_code
.add (&lbldir
,
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
760 holding such, for constants and registers. */
763 regtype_for_type (BrigType16_t t
)
782 case BRIG_TYPE_U16X2
:
784 case BRIG_TYPE_S16X2
:
785 case BRIG_TYPE_F16X2
:
786 return BRIG_TYPE_B32
;
793 case BRIG_TYPE_U16X4
:
794 case BRIG_TYPE_U32X2
:
796 case BRIG_TYPE_S16X4
:
797 case BRIG_TYPE_S32X2
:
798 case BRIG_TYPE_F16X4
:
799 case BRIG_TYPE_F32X2
:
800 return BRIG_TYPE_B64
;
803 case BRIG_TYPE_U8X16
:
804 case BRIG_TYPE_U16X8
:
805 case BRIG_TYPE_U32X4
:
806 case BRIG_TYPE_U64X2
:
807 case BRIG_TYPE_S8X16
:
808 case BRIG_TYPE_S16X8
:
809 case BRIG_TYPE_S32X4
:
810 case BRIG_TYPE_S64X2
:
811 case BRIG_TYPE_F16X8
:
812 case BRIG_TYPE_F32X4
:
813 case BRIG_TYPE_F64X2
:
814 return BRIG_TYPE_B128
;
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
822 an immediate constant (so it must not be B1). */
825 hsa_get_imm_brig_type_len (BrigType16_t type
)
827 BrigType16_t base_type
= type
& BRIG_TYPE_BASE_MASK
;
828 BrigType16_t pack_type
= type
& BRIG_TYPE_PACK_MASK
;
832 case BRIG_TYPE_PACK_NONE
:
834 case BRIG_TYPE_PACK_32
:
836 case BRIG_TYPE_PACK_64
:
838 case BRIG_TYPE_PACK_128
:
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873 If NEED_LEN is not equal to zero, shrink or extend the value
874 to NEED_LEN bytes. Return how many bytes were written. */
877 emit_immediate_scalar_to_buffer (tree value
, char *data
, unsigned need_len
)
879 union hsa_bytes bytes
;
881 memset (&bytes
, 0, sizeof (bytes
));
882 tree type
= TREE_TYPE (value
);
883 gcc_checking_assert (TREE_CODE (type
) != VECTOR_TYPE
);
885 unsigned data_len
= tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
886 if (INTEGRAL_TYPE_P (type
)
887 || (POINTER_TYPE_P (type
) && TREE_CODE (value
) == INTEGER_CST
))
891 bytes
.b8
= (uint8_t) TREE_INT_CST_LOW (value
);
894 bytes
.b16
= (uint16_t) TREE_INT_CST_LOW (value
);
897 bytes
.b32
= (uint32_t) TREE_INT_CST_LOW (value
);
900 bytes
.b64
= (uint64_t) TREE_INT_CST_LOW (value
);
905 else if (SCALAR_FLOAT_TYPE_P (type
))
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
913 unsigned int_len
= GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type
));
914 /* There are always 32 bits in each long, no matter the size of
918 real_to_target (tmp
, TREE_REAL_CST_PTR (value
), TYPE_MODE (type
));
921 bytes
.b32
= (uint32_t) tmp
[0];
924 bytes
.b64
= (uint64_t)(uint32_t) tmp
[1];
926 bytes
.b64
|= (uint32_t) tmp
[0];
938 memcpy (data
, &bytes
, len
);
943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size
)
946 *brig_repr_size
= hsa_get_imm_brig_type_len (m_type
);
948 if (m_tree_value
!= NULL_TREE
)
950 /* Update brig_repr_size for special tree values. */
951 if (TREE_CODE (m_tree_value
) == STRING_CST
)
952 *brig_repr_size
= TREE_STRING_LENGTH (m_tree_value
);
953 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value
)));
957 unsigned total_len
= *brig_repr_size
;
959 /* As we can have a constructor with fewer elements, fill the memory
961 brig_repr
= XCNEWVEC (char, total_len
);
964 if (TREE_CODE (m_tree_value
) == VECTOR_CST
)
966 /* Variable-length vectors aren't supported. */
967 int i
, num
= VECTOR_CST_NELTS (m_tree_value
).to_constant ();
968 for (i
= 0; i
< num
; i
++)
970 tree v
= VECTOR_CST_ELT (m_tree_value
, i
);
971 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
975 /* Vectors should have the exact size. */
976 gcc_assert (total_len
== 0);
978 else if (TREE_CODE (m_tree_value
) == STRING_CST
)
979 memcpy (brig_repr
, TREE_STRING_POINTER (m_tree_value
),
980 TREE_STRING_LENGTH (m_tree_value
));
981 else if (TREE_CODE (m_tree_value
) == COMPLEX_CST
)
983 gcc_assert (total_len
% 2 == 0);
986 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value
), p
,
989 gcc_assert (actual
== total_len
/ 2);
993 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value
), p
,
995 gcc_assert (actual
== total_len
/ 2);
997 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
999 unsigned len
= CONSTRUCTOR_NELTS (m_tree_value
);
1000 for (unsigned i
= 0; i
< len
; i
++)
1002 tree v
= CONSTRUCTOR_ELT (m_tree_value
, i
)->value
;
1003 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
1004 total_len
-= actual
;
1009 emit_immediate_scalar_to_buffer (m_tree_value
, p
, total_len
);
1015 switch (*brig_repr_size
)
1018 bytes
.b8
= (uint8_t) m_int_value
;
1021 bytes
.b16
= (uint16_t) m_int_value
;
1024 bytes
.b32
= (uint32_t) m_int_value
;
1027 bytes
.b64
= (uint64_t) m_int_value
;
1033 brig_repr
= XNEWVEC (char, *brig_repr_size
);
1034 memcpy (brig_repr
, &bytes
, *brig_repr_size
);
1040 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1041 have been massaged to comply with various HSA/BRIG type requirements, so the
1042 only important aspect of that is the length (because HSAIL might expect
1043 smaller constants or become bit-data). The data should be represented
1044 according to what is in the tree representation. */
1047 emit_immediate_operand (hsa_op_immed
*imm
)
1049 unsigned brig_repr_size
;
1050 char *brig_repr
= imm
->emit_to_buffer (&brig_repr_size
);
1051 struct BrigOperandConstantBytes out
;
1053 memset (&out
, 0, sizeof (out
));
1054 out
.base
.byteCount
= lendian16 (sizeof (out
));
1055 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES
);
1056 uint32_t byteCount
= lendian32 (brig_repr_size
);
1057 out
.type
= lendian16 (imm
->m_type
);
1058 out
.bytes
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1059 brig_operand
.add (&out
, sizeof (out
));
1060 brig_data
.add (brig_repr
, brig_repr_size
);
1061 brig_data
.round_size_up (4);
1066 /* Emit a register BRIG operand REG. */
1069 emit_register_operand (hsa_op_reg
*reg
)
1071 struct BrigOperandRegister out
;
1073 out
.base
.byteCount
= lendian16 (sizeof (out
));
1074 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_REGISTER
);
1075 out
.regNum
= lendian32 (reg
->m_hard_num
);
1077 switch (regtype_for_type (reg
->m_type
))
1080 out
.regKind
= BRIG_REGISTER_KIND_SINGLE
;
1083 out
.regKind
= BRIG_REGISTER_KIND_DOUBLE
;
1085 case BRIG_TYPE_B128
:
1086 out
.regKind
= BRIG_REGISTER_KIND_QUAD
;
1089 out
.regKind
= BRIG_REGISTER_KIND_CONTROL
;
1095 brig_operand
.add (&out
, sizeof (out
));
1098 /* Emit an address BRIG operand ADDR. */
1101 emit_address_operand (hsa_op_address
*addr
)
1103 struct BrigOperandAddress out
;
1105 out
.base
.byteCount
= lendian16 (sizeof (out
));
1106 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_ADDRESS
);
1107 out
.symbol
= addr
->m_symbol
1108 ? lendian32 (emit_directive_variable (addr
->m_symbol
)) : 0;
1109 out
.reg
= addr
->m_reg
? lendian32 (enqueue_op (addr
->m_reg
)) : 0;
1111 if (sizeof (addr
->m_imm_offset
) == 8)
1113 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1114 out
.offset
.hi
= lendian32 (addr
->m_imm_offset
>> 32);
1118 gcc_assert (sizeof (addr
->m_imm_offset
) == 4);
1119 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1123 brig_operand
.add (&out
, sizeof (out
));
1126 /* Emit a code reference operand REF. */
1129 emit_code_ref_operand (hsa_op_code_ref
*ref
)
1131 struct BrigOperandCodeRef out
;
1133 out
.base
.byteCount
= lendian16 (sizeof (out
));
1134 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_REF
);
1135 out
.ref
= lendian32 (ref
->m_directive_offset
);
1136 brig_operand
.add (&out
, sizeof (out
));
1139 /* Emit a code list operand CODE_LIST. */
1142 emit_code_list_operand (hsa_op_code_list
*code_list
)
1144 struct BrigOperandCodeList out
;
1145 unsigned args
= code_list
->m_offsets
.length ();
1147 for (unsigned i
= 0; i
< args
; i
++)
1148 gcc_assert (code_list
->m_offsets
[i
]);
1150 out
.base
.byteCount
= lendian16 (sizeof (out
));
1151 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_LIST
);
1153 uint32_t byteCount
= lendian32 (4 * args
);
1155 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1156 brig_data
.add (code_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1157 brig_data
.round_size_up (4);
1158 brig_operand
.add (&out
, sizeof (out
));
1161 /* Emit an operand list operand OPERAND_LIST. */
1164 emit_operand_list_operand (hsa_op_operand_list
*operand_list
)
1166 struct BrigOperandOperandList out
;
1167 unsigned args
= operand_list
->m_offsets
.length ();
1169 for (unsigned i
= 0; i
< args
; i
++)
1170 gcc_assert (operand_list
->m_offsets
[i
]);
1172 out
.base
.byteCount
= lendian16 (sizeof (out
));
1173 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST
);
1175 uint32_t byteCount
= lendian32 (4 * args
);
1177 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1178 brig_data
.add (operand_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1179 brig_data
.round_size_up (4);
1180 brig_operand
.add (&out
, sizeof (out
));
1183 /* Emit all operands queued for writing. */
1186 emit_queued_operands (void)
1188 for (hsa_op_base
*op
= op_queue
.first_op
; op
; op
= op
->m_next
)
1190 gcc_assert (op
->m_brig_op_offset
== brig_operand
.total_size
);
1191 if (hsa_op_immed
*imm
= dyn_cast
<hsa_op_immed
*> (op
))
1192 emit_immediate_operand (imm
);
1193 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
1194 emit_register_operand (reg
);
1195 else if (hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
))
1196 emit_address_operand (addr
);
1197 else if (hsa_op_code_ref
*ref
= dyn_cast
<hsa_op_code_ref
*> (op
))
1198 emit_code_ref_operand (ref
);
1199 else if (hsa_op_code_list
*code_list
= dyn_cast
<hsa_op_code_list
*> (op
))
1200 emit_code_list_operand (code_list
);
1201 else if (hsa_op_operand_list
*l
= dyn_cast
<hsa_op_operand_list
*> (op
))
1202 emit_operand_list_operand (l
);
1208 /* Emit directives describing the function that is used for
1209 a function declaration. */
1211 static BrigDirectiveExecutable
*
1212 emit_function_declaration (tree decl
)
1214 hsa_function_representation
*f
= hsa_generate_function_declaration (decl
);
1216 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1217 emit_queued_operands ();
1224 /* Emit directives describing the function that is used for
1225 an internal function declaration. */
1227 static BrigDirectiveExecutable
*
1228 emit_internal_fn_decl (hsa_internal_fn
*fn
)
1230 hsa_function_representation
*f
= hsa_generate_internal_fn_decl (fn
);
1232 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1233 emit_queued_operands ();
1240 /* Enqueue all operands of INSN and return offset to BRIG data section
1241 to list of operand offsets. */
1244 emit_insn_operands (hsa_insn_basic
*insn
)
1246 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1249 unsigned l
= insn
->operand_count ();
1251 /* We have N operands so use 4 * N for the byte_count. */
1252 uint32_t byte_count
= lendian32 (4 * l
);
1253 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1256 operand_offsets
.safe_grow (l
);
1257 for (unsigned i
= 0; i
< l
; i
++)
1258 operand_offsets
[i
] = lendian32 (enqueue_op (insn
->get_op (i
)));
1260 brig_data
.add (operand_offsets
.address (),
1261 l
* sizeof (BrigOperandOffset32_t
));
1263 brig_data
.round_size_up (4);
1267 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1268 to BRIG data section to list of operand offsets. */
1271 emit_operands (hsa_op_base
*op0
, hsa_op_base
*op1
= NULL
,
1272 hsa_op_base
*op2
= NULL
)
1274 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1277 gcc_checking_assert (op0
!= NULL
);
1278 operand_offsets
.safe_push (enqueue_op (op0
));
1282 operand_offsets
.safe_push (enqueue_op (op1
));
1284 operand_offsets
.safe_push (enqueue_op (op2
));
1287 unsigned l
= operand_offsets
.length ();
1289 /* We have N operands so use 4 * N for the byte_count. */
1290 uint32_t byte_count
= lendian32 (4 * l
);
1292 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1293 brig_data
.add (operand_offsets
.address (),
1294 l
* sizeof (BrigOperandOffset32_t
));
1296 brig_data
.round_size_up (4);
1301 /* Emit an HSA memory instruction and all necessary directives, schedule
1302 necessary operands for writing. */
1305 emit_memory_insn (hsa_insn_mem
*mem
)
1307 struct BrigInstMem repr
;
1308 gcc_checking_assert (mem
->operand_count () == 2);
1310 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1312 /* This is necessary because of the erroneous typedef of
1313 BrigMemoryModifier8_t which introduces padding which may then contain
1314 random stuff (which we do not want so that we can test things don't
1316 memset (&repr
, 0, sizeof (repr
));
1317 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1318 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1319 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1320 repr
.base
.type
= lendian16 (mem
->m_type
);
1321 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1324 repr
.segment
= addr
->m_symbol
->m_segment
;
1326 repr
.segment
= BRIG_SEGMENT_FLAT
;
1328 repr
.equivClass
= mem
->m_equiv_class
;
1329 repr
.align
= mem
->m_align
;
1330 if (mem
->m_opcode
== BRIG_OPCODE_LD
)
1331 repr
.width
= BRIG_WIDTH_1
;
1333 repr
.width
= BRIG_WIDTH_NONE
;
1334 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1335 brig_code
.add (&repr
, sizeof (repr
));
1339 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1340 necessary operands for writing. */
1343 emit_signal_insn (hsa_insn_signal
*mem
)
1345 struct BrigInstSignal repr
;
1347 memset (&repr
, 0, sizeof (repr
));
1348 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1349 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SIGNAL
);
1350 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1351 repr
.base
.type
= lendian16 (mem
->m_type
);
1352 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1354 repr
.memoryOrder
= mem
->m_memory_order
;
1355 repr
.signalOperation
= mem
->m_signalop
;
1356 repr
.signalType
= hsa_machine_large_p () ? BRIG_TYPE_SIG64
: BRIG_TYPE_SIG32
;
1358 brig_code
.add (&repr
, sizeof (repr
));
1362 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1363 necessary operands for writing. */
1366 emit_atomic_insn (hsa_insn_atomic
*mem
)
1368 struct BrigInstAtomic repr
;
1370 /* Either operand[0] or operand[1] must be an address operand. */
1371 hsa_op_address
*addr
= NULL
;
1372 if (is_a
<hsa_op_address
*> (mem
->get_op (0)))
1373 addr
= as_a
<hsa_op_address
*> (mem
->get_op (0));
1375 addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1377 memset (&repr
, 0, sizeof (repr
));
1378 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1379 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ATOMIC
);
1380 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1381 repr
.base
.type
= lendian16 (mem
->m_type
);
1382 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1385 repr
.segment
= addr
->m_symbol
->m_segment
;
1387 repr
.segment
= BRIG_SEGMENT_FLAT
;
1388 repr
.memoryOrder
= mem
->m_memoryorder
;
1389 repr
.memoryScope
= mem
->m_memoryscope
;
1390 repr
.atomicOperation
= mem
->m_atomicop
;
1392 brig_code
.add (&repr
, sizeof (repr
));
1396 /* Emit an HSA LDA instruction and all necessary directives, schedule
1397 necessary operands for writing. */
1400 emit_addr_insn (hsa_insn_basic
*insn
)
1402 struct BrigInstAddr repr
;
1404 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (insn
->get_op (1));
1406 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1407 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ADDR
);
1408 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1409 repr
.base
.type
= lendian16 (insn
->m_type
);
1410 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1413 repr
.segment
= addr
->m_symbol
->m_segment
;
1415 repr
.segment
= BRIG_SEGMENT_FLAT
;
1416 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1418 brig_code
.add (&repr
, sizeof (repr
));
1422 /* Emit an HSA segment conversion instruction and all necessary directives,
1423 schedule necessary operands for writing. */
1426 emit_segment_insn (hsa_insn_seg
*seg
)
1428 struct BrigInstSegCvt repr
;
1430 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1431 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SEG_CVT
);
1432 repr
.base
.opcode
= lendian16 (seg
->m_opcode
);
1433 repr
.base
.type
= lendian16 (seg
->m_type
);
1434 repr
.base
.operands
= lendian32 (emit_insn_operands (seg
));
1435 repr
.sourceType
= lendian16 (as_a
<hsa_op_reg
*> (seg
->get_op (1))->m_type
);
1436 repr
.segment
= seg
->m_segment
;
1439 brig_code
.add (&repr
, sizeof (repr
));
1444 /* Emit an HSA alloca instruction and all necessary directives,
1445 schedule necessary operands for writing. */
1448 emit_alloca_insn (hsa_insn_alloca
*alloca
)
1450 struct BrigInstMem repr
;
1451 gcc_checking_assert (alloca
->operand_count () == 2);
1453 memset (&repr
, 0, sizeof (repr
));
1454 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1455 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1456 repr
.base
.opcode
= lendian16 (alloca
->m_opcode
);
1457 repr
.base
.type
= lendian16 (alloca
->m_type
);
1458 repr
.base
.operands
= lendian32 (emit_insn_operands (alloca
));
1459 repr
.segment
= BRIG_SEGMENT_PRIVATE
;
1461 repr
.equivClass
= 0;
1462 repr
.align
= alloca
->m_align
;
1463 repr
.width
= BRIG_WIDTH_NONE
;
1464 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1465 brig_code
.add (&repr
, sizeof (repr
));
1469 /* Emit an HSA comparison instruction and all necessary directives,
1470 schedule necessary operands for writing. */
1473 emit_cmp_insn (hsa_insn_cmp
*cmp
)
1475 struct BrigInstCmp repr
;
1477 memset (&repr
, 0, sizeof (repr
));
1478 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1479 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CMP
);
1480 repr
.base
.opcode
= lendian16 (cmp
->m_opcode
);
1481 repr
.base
.type
= lendian16 (cmp
->m_type
);
1482 repr
.base
.operands
= lendian32 (emit_insn_operands (cmp
));
1484 if (is_a
<hsa_op_reg
*> (cmp
->get_op (1)))
1486 = lendian16 (as_a
<hsa_op_reg
*> (cmp
->get_op (1))->m_type
);
1489 = lendian16 (as_a
<hsa_op_immed
*> (cmp
->get_op (1))->m_type
);
1491 repr
.compare
= cmp
->m_compare
;
1494 brig_code
.add (&repr
, sizeof (repr
));
1498 /* Emit an HSA generic branching/sycnronization instruction. */
1501 emit_generic_branch_insn (hsa_insn_br
*br
)
1503 struct BrigInstBr repr
;
1504 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1505 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1506 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1507 repr
.width
= br
->m_width
;
1508 repr
.base
.type
= lendian16 (br
->m_type
);
1509 repr
.base
.operands
= lendian32 (emit_insn_operands (br
));
1510 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1512 brig_code
.add (&repr
, sizeof (repr
));
1516 /* Emit an HSA conditional branching instruction and all necessary directives,
1517 schedule necessary operands for writing. */
1520 emit_cond_branch_insn (hsa_insn_cbr
*br
)
1522 struct BrigInstBr repr
;
1524 basic_block target
= NULL
;
1528 /* At the moment we only handle direct conditional jumps. */
1529 gcc_assert (br
->m_opcode
== BRIG_OPCODE_CBR
);
1530 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1531 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1532 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1533 repr
.width
= br
->m_width
;
1534 /* For Conditional jumps the type is always B1. */
1535 repr
.base
.type
= lendian16 (BRIG_TYPE_B1
);
1537 FOR_EACH_EDGE (e
, ei
, br
->m_bb
->succs
)
1538 if (e
->flags
& EDGE_TRUE_VALUE
)
1543 gcc_assert (target
);
1546 = lendian32 (emit_operands (br
->get_op (0),
1547 &hsa_bb_for_bb (target
)->m_label_ref
));
1548 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1550 brig_code
.add (&repr
, sizeof (repr
));
1554 /* Emit an HSA unconditional jump branching instruction that points to
1555 a label REFERENCE. */
1558 emit_unconditional_jump (hsa_op_code_ref
*reference
)
1560 struct BrigInstBr repr
;
1562 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1563 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1564 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_BR
);
1565 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1566 /* Direct branches to labels must be width(all). */
1567 repr
.width
= BRIG_WIDTH_ALL
;
1569 repr
.base
.operands
= lendian32 (emit_operands (reference
));
1570 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1571 brig_code
.add (&repr
, sizeof (repr
));
1575 /* Emit an HSA switch jump instruction that uses a jump table to
1576 jump to a destination label. */
1579 emit_switch_insn (hsa_insn_sbr
*sbr
)
1581 struct BrigInstBr repr
;
1583 gcc_assert (sbr
->m_opcode
== BRIG_OPCODE_SBR
);
1584 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1585 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1586 repr
.base
.opcode
= lendian16 (sbr
->m_opcode
);
1587 repr
.width
= BRIG_WIDTH_1
;
1588 /* For Conditional jumps the type is always B1. */
1589 hsa_op_reg
*index
= as_a
<hsa_op_reg
*> (sbr
->get_op (0));
1590 repr
.base
.type
= lendian16 (index
->m_type
);
1592 = lendian32 (emit_operands (sbr
->get_op (0), sbr
->m_label_code_list
));
1593 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1595 brig_code
.add (&repr
, sizeof (repr
));
1599 /* Emit a HSA convert instruction and all necessary directives, schedule
1600 necessary operands for writing. */
1603 emit_cvt_insn (hsa_insn_cvt
*insn
)
1605 struct BrigInstCvt repr
;
1606 BrigType16_t srctype
;
1608 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1609 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CVT
);
1610 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1611 repr
.base
.type
= lendian16 (insn
->m_type
);
1612 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1614 if (is_a
<hsa_op_reg
*> (insn
->get_op (1)))
1615 srctype
= as_a
<hsa_op_reg
*> (insn
->get_op (1))->m_type
;
1617 srctype
= as_a
<hsa_op_immed
*> (insn
->get_op (1))->m_type
;
1618 repr
.sourceType
= lendian16 (srctype
);
1620 /* float to smaller float requires a rounding setting (we default
1622 if (hsa_type_float_p (insn
->m_type
)
1623 && (!hsa_type_float_p (srctype
)
1624 || ((insn
->m_type
& BRIG_TYPE_BASE_MASK
)
1625 < (srctype
& BRIG_TYPE_BASE_MASK
))))
1626 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1627 else if (hsa_type_integer_p (insn
->m_type
) &&
1628 hsa_type_float_p (srctype
))
1629 repr
.round
= BRIG_ROUND_INTEGER_ZERO
;
1631 repr
.round
= BRIG_ROUND_NONE
;
1632 brig_code
.add (&repr
, sizeof (repr
));
1636 /* Emit call instruction INSN, where this instruction must be closed
1637 within a call block instruction. */
1640 emit_call_insn (hsa_insn_call
*call
)
1642 struct BrigInstBr repr
;
1644 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1645 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1646 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_CALL
);
1647 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1650 = lendian32 (emit_operands (call
->m_result_code_list
, &call
->m_func
,
1651 call
->m_args_code_list
));
1653 /* Internal functions have not set m_called_function. */
1654 if (call
->m_called_function
)
1656 function_linkage_pair
pair (call
->m_called_function
,
1657 call
->m_func
.m_brig_op_offset
);
1658 function_call_linkage
.safe_push (pair
);
1662 hsa_internal_fn
*slot
1663 = hsa_emitted_internal_decls
->find (call
->m_called_internal_fn
);
1665 gcc_assert (slot
->m_offset
> 0);
1666 call
->m_func
.m_directive_offset
= slot
->m_offset
;
1669 repr
.width
= BRIG_WIDTH_ALL
;
1670 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1672 brig_code
.add (&repr
, sizeof (repr
));
1676 /* Emit argument block directive. */
1679 emit_arg_block_insn (hsa_insn_arg_block
*insn
)
1681 switch (insn
->m_kind
)
1683 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
:
1685 struct BrigDirectiveArgBlock repr
;
1686 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1687 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1688 brig_code
.add (&repr
, sizeof (repr
));
1690 for (unsigned i
= 0; i
< insn
->m_call_insn
->m_input_args
.length (); i
++)
1692 insn
->m_call_insn
->m_args_code_list
->m_offsets
[i
]
1693 = lendian32 (emit_directive_variable
1694 (insn
->m_call_insn
->m_input_args
[i
]));
1698 if (insn
->m_call_insn
->m_output_arg
)
1700 insn
->m_call_insn
->m_result_code_list
->m_offsets
[0]
1701 = lendian32 (emit_directive_variable
1702 (insn
->m_call_insn
->m_output_arg
));
1708 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END
:
1710 struct BrigDirectiveArgBlock repr
;
1711 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1712 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1713 brig_code
.add (&repr
, sizeof (repr
));
1723 /* Emit comment directive. */
1726 emit_comment_insn (hsa_insn_comment
*insn
)
1728 struct BrigDirectiveComment repr
;
1729 memset (&repr
, 0, sizeof (repr
));
1731 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1732 repr
.base
.kind
= lendian16 (insn
->m_opcode
);
1733 repr
.name
= brig_emit_string (insn
->m_comment
, '\0', false);
1734 brig_code
.add (&repr
, sizeof (repr
));
1737 /* Emit queue instruction INSN. */
1740 emit_queue_insn (hsa_insn_queue
*insn
)
1743 memset (&repr
, 0, sizeof (repr
));
1745 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1746 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_QUEUE
);
1747 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1748 repr
.base
.type
= lendian16 (insn
->m_type
);
1749 repr
.segment
= insn
->m_segment
;
1750 repr
.memoryOrder
= insn
->m_memory_order
;
1751 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1752 brig_data
.round_size_up (4);
1753 brig_code
.add (&repr
, sizeof (repr
));
1758 /* Emit source type instruction INSN. */
1761 emit_srctype_insn (hsa_insn_srctype
*insn
)
1763 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1764 struct BrigInstSourceType repr
;
1765 unsigned operand_count
= insn
->operand_count ();
1766 gcc_checking_assert (operand_count
>= 2);
1768 memset (&repr
, 0, sizeof (repr
));
1769 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1770 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1771 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1772 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1773 repr
.base
.type
= lendian16 (insn
->m_type
);
1775 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1776 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1780 /* Emit packed instruction INSN. */
1783 emit_packed_insn (hsa_insn_packed
*insn
)
1785 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1786 struct BrigInstSourceType repr
;
1787 unsigned operand_count
= insn
->operand_count ();
1788 gcc_checking_assert (operand_count
>= 2);
1790 memset (&repr
, 0, sizeof (repr
));
1791 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1792 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1793 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1794 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1795 repr
.base
.type
= lendian16 (insn
->m_type
);
1797 if (insn
->m_opcode
== BRIG_OPCODE_COMBINE
)
1799 /* Create operand list for packed type. */
1800 for (unsigned i
= 1; i
< operand_count
; i
++)
1802 gcc_checking_assert (insn
->get_op (i
));
1803 insn
->m_operand_list
->m_offsets
[i
- 1]
1804 = lendian32 (enqueue_op (insn
->get_op (i
)));
1807 repr
.base
.operands
= lendian32 (emit_operands (insn
->get_op (0),
1808 insn
->m_operand_list
));
1810 else if (insn
->m_opcode
== BRIG_OPCODE_EXPAND
)
1812 /* Create operand list for packed type. */
1813 for (unsigned i
= 0; i
< operand_count
- 1; i
++)
1815 gcc_checking_assert (insn
->get_op (i
));
1816 insn
->m_operand_list
->m_offsets
[i
]
1817 = lendian32 (enqueue_op (insn
->get_op (i
)));
1820 unsigned ops
= emit_operands (insn
->m_operand_list
,
1821 insn
->get_op (insn
->operand_count () - 1));
1822 repr
.base
.operands
= lendian32 (ops
);
1826 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1830 /* Emit a basic HSA instruction and all necessary directives, schedule
1831 necessary operands for writing. */
1834 emit_basic_insn (hsa_insn_basic
*insn
)
1836 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1837 struct BrigInstMod repr
;
1840 memset (&repr
, 0, sizeof (repr
));
1841 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstBasic
));
1842 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BASIC
);
1843 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1844 switch (insn
->m_opcode
)
1846 /* And the bit-logical operations need bit types and whine about
1847 arithmetic types :-/ */
1848 case BRIG_OPCODE_AND
:
1849 case BRIG_OPCODE_OR
:
1850 case BRIG_OPCODE_XOR
:
1851 case BRIG_OPCODE_NOT
:
1852 type
= regtype_for_type (insn
->m_type
);
1855 type
= insn
->m_type
;
1858 repr
.base
.type
= lendian16 (type
);
1859 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1861 if (hsa_type_packed_p (type
))
1863 if (hsa_type_float_p (type
)
1864 && !hsa_opcode_floating_bit_insn_p (insn
->m_opcode
))
1865 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1868 /* We assume that destination and sources agree in packing layout. */
1869 if (insn
->num_used_ops () >= 2)
1870 repr
.pack
= BRIG_PACK_PP
;
1872 repr
.pack
= BRIG_PACK_P
;
1874 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstMod
));
1875 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MOD
);
1876 brig_code
.add (&repr
, sizeof (struct BrigInstMod
));
1879 brig_code
.add (&repr
, sizeof (struct BrigInstBasic
));
1883 /* Emit an HSA instruction and all necessary directives, schedule necessary
1884 operands for writing. */
1887 emit_insn (hsa_insn_basic
*insn
)
1889 gcc_assert (!is_a
<hsa_insn_phi
*> (insn
));
1891 insn
->m_brig_offset
= brig_code
.total_size
;
1893 if (hsa_insn_signal
*signal
= dyn_cast
<hsa_insn_signal
*> (insn
))
1894 emit_signal_insn (signal
);
1895 else if (hsa_insn_atomic
*atom
= dyn_cast
<hsa_insn_atomic
*> (insn
))
1896 emit_atomic_insn (atom
);
1897 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
1898 emit_memory_insn (mem
);
1899 else if (insn
->m_opcode
== BRIG_OPCODE_LDA
)
1900 emit_addr_insn (insn
);
1901 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
1902 emit_segment_insn (seg
);
1903 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
1904 emit_cmp_insn (cmp
);
1905 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
1906 emit_cond_branch_insn (br
);
1907 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
1909 if (switch_instructions
== NULL
)
1910 switch_instructions
= new vec
<hsa_insn_sbr
*> ();
1912 switch_instructions
->safe_push (sbr
);
1913 emit_switch_insn (sbr
);
1915 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
1916 emit_generic_branch_insn (br
);
1917 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
1918 emit_arg_block_insn (block
);
1919 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
1920 emit_call_insn (call
);
1921 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
1922 emit_comment_insn (comment
);
1923 else if (hsa_insn_queue
*queue
= dyn_cast
<hsa_insn_queue
*> (insn
))
1924 emit_queue_insn (queue
);
1925 else if (hsa_insn_srctype
*srctype
= dyn_cast
<hsa_insn_srctype
*> (insn
))
1926 emit_srctype_insn (srctype
);
1927 else if (hsa_insn_packed
*packed
= dyn_cast
<hsa_insn_packed
*> (insn
))
1928 emit_packed_insn (packed
);
1929 else if (hsa_insn_cvt
*cvt
= dyn_cast
<hsa_insn_cvt
*> (insn
))
1930 emit_cvt_insn (cvt
);
1931 else if (hsa_insn_alloca
*alloca
= dyn_cast
<hsa_insn_alloca
*> (insn
))
1932 emit_alloca_insn (alloca
);
1934 emit_basic_insn (insn
);
1937 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1938 or we are about to finish emitting code, if it is NULL. If the fall through
1939 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1942 perhaps_emit_branch (basic_block bb
, basic_block next_bb
)
1944 basic_block t_bb
= NULL
, ff
= NULL
;
1949 /* If the last instruction of BB is a switch, ignore emission of all
1951 if (hsa_bb_for_bb (bb
)->m_last_insn
1952 && is_a
<hsa_insn_sbr
*> (hsa_bb_for_bb (bb
)->m_last_insn
))
1955 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1956 if (e
->flags
& EDGE_TRUE_VALUE
)
1967 if (!ff
|| ff
== next_bb
|| ff
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1970 emit_unconditional_jump (&hsa_bb_for_bb (ff
)->m_label_ref
);
1973 /* Emit the a function with name NAME to the various brig sections. */
1976 hsa_brig_emit_function (void)
1978 basic_block bb
, prev_bb
;
1979 hsa_insn_basic
*insn
;
1980 BrigDirectiveExecutable
*ptr_to_fndir
;
1984 brig_insn_count
= 0;
1985 memset (&op_queue
, 0, sizeof (op_queue
));
1986 op_queue
.projected_size
= brig_operand
.total_size
;
1988 if (!function_offsets
)
1989 function_offsets
= new hash_map
<tree
, BrigCodeOffset32_t
> ();
1991 if (!emitted_declarations
)
1992 emitted_declarations
= new hash_map
<tree
, BrigDirectiveExecutable
*> ();
1994 for (unsigned i
= 0; i
< hsa_cfun
->m_called_functions
.length (); i
++)
1996 tree called
= hsa_cfun
->m_called_functions
[i
];
1998 /* If the function has no definition, emit a declaration. */
1999 if (!emitted_declarations
->get (called
))
2001 BrigDirectiveExecutable
*e
= emit_function_declaration (called
);
2002 emitted_declarations
->put (called
, e
);
2006 for (unsigned i
= 0; i
< hsa_cfun
->m_called_internal_fns
.length (); i
++)
2008 hsa_internal_fn
*called
= hsa_cfun
->m_called_internal_fns
[i
];
2009 emit_internal_fn_decl (called
);
2012 ptr_to_fndir
= emit_function_directives (hsa_cfun
, false);
2013 for (insn
= hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->m_first_insn
;
2015 insn
= insn
->m_next
)
2017 prev_bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2018 FOR_EACH_BB_FN (bb
, cfun
)
2020 perhaps_emit_branch (prev_bb
, bb
);
2021 emit_bb_label_directive (hsa_bb_for_bb (bb
));
2022 for (insn
= hsa_bb_for_bb (bb
)->m_first_insn
; insn
; insn
= insn
->m_next
)
2026 perhaps_emit_branch (prev_bb
, NULL
);
2027 ptr_to_fndir
->nextModuleEntry
= lendian32 (brig_code
.total_size
);
2029 /* Fill up label references for all sbr instructions. */
2030 if (switch_instructions
)
2032 for (unsigned i
= 0; i
< switch_instructions
->length (); i
++)
2034 hsa_insn_sbr
*sbr
= (*switch_instructions
)[i
];
2035 for (unsigned j
= 0; j
< sbr
->m_jump_table
.length (); j
++)
2037 hsa_bb
*hbb
= hsa_bb_for_bb (sbr
->m_jump_table
[j
]);
2038 sbr
->m_label_code_list
->m_offsets
[j
]
2039 = hbb
->m_label_ref
.m_directive_offset
;
2043 switch_instructions
->release ();
2044 delete switch_instructions
;
2045 switch_instructions
= NULL
;
2050 fprintf (dump_file
, "------- After BRIG emission: -------\n");
2051 dump_hsa_cfun (dump_file
);
2054 emit_queued_operands ();
2057 /* Emit all OMP symbols related to OMP. */
2060 hsa_brig_emit_omp_symbols (void)
2063 emit_directive_variable (hsa_num_threads
);
2066 /* Create and return __hsa_global_variables symbol that contains
2067 all informations consumed by libgomp to link global variables
2068 with their string names used by an HSA kernel. */
2071 hsa_output_global_variables ()
2073 unsigned l
= hsa_global_variable_symbols
->elements ();
2075 tree variable_info_type
= make_node (RECORD_TYPE
);
2076 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2077 get_identifier ("name"), ptr_type_node
);
2078 DECL_CHAIN (id_f1
) = NULL_TREE
;
2079 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2080 get_identifier ("omp_data_size"),
2082 DECL_CHAIN (id_f2
) = id_f1
;
2083 finish_builtin_struct (variable_info_type
, "__hsa_variable_info", id_f2
,
2086 tree int_num_of_global_vars
;
2087 int_num_of_global_vars
= build_int_cst (uint32_type_node
, l
);
2088 tree global_vars_num_index_type
= build_index_type (int_num_of_global_vars
);
2089 tree global_vars_array_type
= build_array_type (variable_info_type
,
2090 global_vars_num_index_type
);
2091 TYPE_ARTIFICIAL (global_vars_array_type
) = 1;
2093 vec
<constructor_elt
, va_gc
> *global_vars_vec
= NULL
;
2095 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
2096 = hsa_global_variable_symbols
->begin ();
2097 it
!= hsa_global_variable_symbols
->end (); ++it
)
2099 unsigned len
= strlen ((*it
)->m_name
);
2100 char *copy
= XNEWVEC (char, len
+ 2);
2102 memcpy (copy
+ 1, (*it
)->m_name
, len
);
2103 copy
[len
+ 1] = '\0';
2105 hsa_sanitize_name (copy
);
2107 tree var_name
= build_string (len
, copy
);
2108 TREE_TYPE (var_name
)
2109 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2112 vec
<constructor_elt
, va_gc
> *variable_info_vec
= NULL
;
2113 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2115 build_pointer_type (TREE_TYPE (var_name
)),
2117 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2118 build_fold_addr_expr ((*it
)->m_decl
));
2120 tree variable_info_ctor
= build_constructor (variable_info_type
,
2123 CONSTRUCTOR_APPEND_ELT (global_vars_vec
, NULL_TREE
,
2124 variable_info_ctor
);
2127 tree global_vars_ctor
= build_constructor (global_vars_array_type
,
2131 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_global_variables", 1);
2132 tree global_vars_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2133 get_identifier (tmp_name
),
2134 global_vars_array_type
);
2135 TREE_STATIC (global_vars_table
) = 1;
2136 TREE_READONLY (global_vars_table
) = 1;
2137 TREE_PUBLIC (global_vars_table
) = 0;
2138 DECL_ARTIFICIAL (global_vars_table
) = 1;
2139 DECL_IGNORED_P (global_vars_table
) = 1;
2140 DECL_EXTERNAL (global_vars_table
) = 0;
2141 TREE_CONSTANT (global_vars_table
) = 1;
2142 DECL_INITIAL (global_vars_table
) = global_vars_ctor
;
2143 varpool_node::finalize_decl (global_vars_table
);
2145 return global_vars_table
;
2148 /* Create __hsa_host_functions and __hsa_kernels that contain
2149 all informations consumed by libgomp to register all kernels
2150 in the BRIG binary. */
2153 hsa_output_kernels (tree
*host_func_table
, tree
*kernels
)
2155 unsigned map_count
= hsa_get_number_decl_kernel_mappings ();
2157 tree int_num_of_kernels
;
2158 int_num_of_kernels
= build_int_cst (uint32_type_node
, map_count
);
2159 tree kernel_num_index_type
= build_index_type (int_num_of_kernels
);
2160 tree host_functions_array_type
= build_array_type (ptr_type_node
,
2161 kernel_num_index_type
);
2162 TYPE_ARTIFICIAL (host_functions_array_type
) = 1;
2164 vec
<constructor_elt
, va_gc
> *host_functions_vec
= NULL
;
2165 for (unsigned i
= 0; i
< map_count
; ++i
)
2167 tree decl
= hsa_get_decl_kernel_mapping_decl (i
);
2168 tree host_fn
= build_fold_addr_expr (hsa_get_host_function (decl
));
2169 CONSTRUCTOR_APPEND_ELT (host_functions_vec
, NULL_TREE
, host_fn
);
2171 tree host_functions_ctor
= build_constructor (host_functions_array_type
,
2172 host_functions_vec
);
2174 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_host_functions", 1);
2175 tree hsa_host_func_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2176 get_identifier (tmp_name
),
2177 host_functions_array_type
);
2178 TREE_STATIC (hsa_host_func_table
) = 1;
2179 TREE_READONLY (hsa_host_func_table
) = 1;
2180 TREE_PUBLIC (hsa_host_func_table
) = 0;
2181 DECL_ARTIFICIAL (hsa_host_func_table
) = 1;
2182 DECL_IGNORED_P (hsa_host_func_table
) = 1;
2183 DECL_EXTERNAL (hsa_host_func_table
) = 0;
2184 TREE_CONSTANT (hsa_host_func_table
) = 1;
2185 DECL_INITIAL (hsa_host_func_table
) = host_functions_ctor
;
2186 varpool_node::finalize_decl (hsa_host_func_table
);
2187 *host_func_table
= hsa_host_func_table
;
2189 /* Following code emits list of kernel_info structures. */
2191 tree kernel_info_type
= make_node (RECORD_TYPE
);
2192 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2193 get_identifier ("name"), ptr_type_node
);
2194 DECL_CHAIN (id_f1
) = NULL_TREE
;
2195 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2196 get_identifier ("omp_data_size"),
2197 unsigned_type_node
);
2198 DECL_CHAIN (id_f2
) = id_f1
;
2199 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2200 get_identifier ("gridified_kernel_p"),
2202 DECL_CHAIN (id_f3
) = id_f2
;
2203 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2204 get_identifier ("kernel_dependencies_count"),
2205 unsigned_type_node
);
2206 DECL_CHAIN (id_f4
) = id_f3
;
2207 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2208 get_identifier ("kernel_dependencies"),
2209 build_pointer_type (build_pointer_type
2211 DECL_CHAIN (id_f5
) = id_f4
;
2212 finish_builtin_struct (kernel_info_type
, "__hsa_kernel_info", id_f5
,
2215 int_num_of_kernels
= build_int_cstu (uint32_type_node
, map_count
);
2216 tree kernel_info_vector_type
2217 = build_array_type (kernel_info_type
,
2218 build_index_type (int_num_of_kernels
));
2219 TYPE_ARTIFICIAL (kernel_info_vector_type
) = 1;
2221 vec
<constructor_elt
, va_gc
> *kernel_info_vector_vec
= NULL
;
2222 tree kernel_dependencies_vector_type
= NULL
;
2224 for (unsigned i
= 0; i
< map_count
; ++i
)
2226 tree kernel
= hsa_get_decl_kernel_mapping_decl (i
);
2227 char *name
= hsa_get_decl_kernel_mapping_name (i
);
2228 unsigned len
= strlen (name
);
2229 char *copy
= XNEWVEC (char, len
+ 2);
2231 memcpy (copy
+ 1, name
, len
);
2232 copy
[len
+ 1] = '\0';
2235 tree kern_name
= build_string (len
, copy
);
2236 TREE_TYPE (kern_name
)
2237 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2240 unsigned omp_size
= hsa_get_decl_kernel_mapping_omp_size (i
);
2241 tree omp_data_size
= build_int_cstu (unsigned_type_node
, omp_size
);
2242 bool gridified_kernel_p
= hsa_get_decl_kernel_mapping_gridified (i
);
2243 tree gridified_kernel_p_tree
= build_int_cstu (boolean_type_node
,
2244 gridified_kernel_p
);
2246 vec
<constructor_elt
, va_gc
> *kernel_dependencies_vec
= NULL
;
2247 if (hsa_decl_kernel_dependencies
)
2249 vec
<const char *> **slot
;
2250 slot
= hsa_decl_kernel_dependencies
->get (kernel
);
2253 vec
<const char *> *dependencies
= *slot
;
2254 count
= dependencies
->length ();
2256 kernel_dependencies_vector_type
2257 = build_array_type (build_pointer_type (char_type_node
),
2258 build_index_type (size_int (count
)));
2259 TYPE_ARTIFICIAL (kernel_dependencies_vector_type
) = 1;
2261 for (unsigned j
= 0; j
< count
; j
++)
2263 const char *d
= (*dependencies
)[j
];
2265 tree dependency_name
= build_string (len
, d
);
2266 TREE_TYPE (dependency_name
)
2267 = build_array_type (char_type_node
,
2268 build_index_type (size_int (len
)));
2270 CONSTRUCTOR_APPEND_ELT
2271 (kernel_dependencies_vec
, NULL_TREE
,
2273 build_pointer_type (TREE_TYPE (dependency_name
)),
2279 tree dependencies_count
= build_int_cstu (unsigned_type_node
, count
);
2281 vec
<constructor_elt
, va_gc
> *kernel_info_vec
= NULL
;
2282 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2284 build_pointer_type (TREE_TYPE
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, omp_data_size
);
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2289 gridified_kernel_p_tree
);
2290 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, dependencies_count
);
2294 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_dependencies_list", i
);
2295 gcc_checking_assert (kernel_dependencies_vector_type
);
2296 tree dependencies_list
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2297 get_identifier (tmp_name
),
2298 kernel_dependencies_vector_type
);
2300 TREE_STATIC (dependencies_list
) = 1;
2301 TREE_READONLY (dependencies_list
) = 1;
2302 TREE_PUBLIC (dependencies_list
) = 0;
2303 DECL_ARTIFICIAL (dependencies_list
) = 1;
2304 DECL_IGNORED_P (dependencies_list
) = 1;
2305 DECL_EXTERNAL (dependencies_list
) = 0;
2306 TREE_CONSTANT (dependencies_list
) = 1;
2307 DECL_INITIAL (dependencies_list
)
2308 = build_constructor (kernel_dependencies_vector_type
,
2309 kernel_dependencies_vec
);
2310 varpool_node::finalize_decl (dependencies_list
);
2312 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2315 (TREE_TYPE (dependencies_list
)),
2316 dependencies_list
));
2319 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, null_pointer_node
);
2321 tree kernel_info_ctor
= build_constructor (kernel_info_type
,
2324 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec
, NULL_TREE
,
2328 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_kernels", 1);
2329 tree hsa_kernels
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2330 get_identifier (tmp_name
),
2331 kernel_info_vector_type
);
2333 TREE_STATIC (hsa_kernels
) = 1;
2334 TREE_READONLY (hsa_kernels
) = 1;
2335 TREE_PUBLIC (hsa_kernels
) = 0;
2336 DECL_ARTIFICIAL (hsa_kernels
) = 1;
2337 DECL_IGNORED_P (hsa_kernels
) = 1;
2338 DECL_EXTERNAL (hsa_kernels
) = 0;
2339 TREE_CONSTANT (hsa_kernels
) = 1;
2340 DECL_INITIAL (hsa_kernels
) = build_constructor (kernel_info_vector_type
,
2341 kernel_info_vector_vec
);
2342 varpool_node::finalize_decl (hsa_kernels
);
2343 *kernels
= hsa_kernels
;
2346 /* Create a static constructor that will register out brig stuff with
2350 hsa_output_libgomp_mapping (tree brig_decl
)
2352 unsigned kernel_count
= hsa_get_number_decl_kernel_mappings ();
2353 unsigned global_variable_count
= hsa_global_variable_symbols
->elements ();
2356 tree host_func_table
;
2358 hsa_output_kernels (&host_func_table
, &kernels
);
2359 tree global_vars
= hsa_output_global_variables ();
2361 tree hsa_image_desc_type
= make_node (RECORD_TYPE
);
2362 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2363 get_identifier ("brig_module"), ptr_type_node
);
2364 DECL_CHAIN (id_f1
) = NULL_TREE
;
2365 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2366 get_identifier ("kernel_count"),
2367 unsigned_type_node
);
2369 DECL_CHAIN (id_f2
) = id_f1
;
2370 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2371 get_identifier ("hsa_kernel_infos"),
2373 DECL_CHAIN (id_f3
) = id_f2
;
2374 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2375 get_identifier ("global_variable_count"),
2376 unsigned_type_node
);
2377 DECL_CHAIN (id_f4
) = id_f3
;
2378 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2379 get_identifier ("hsa_global_variable_infos"),
2381 DECL_CHAIN (id_f5
) = id_f4
;
2382 finish_builtin_struct (hsa_image_desc_type
, "__hsa_image_desc", id_f5
,
2384 TYPE_ARTIFICIAL (hsa_image_desc_type
) = 1;
2386 vec
<constructor_elt
, va_gc
> *img_desc_vec
= NULL
;
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2388 build_fold_addr_expr (brig_decl
));
2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2390 build_int_cstu (unsigned_type_node
, kernel_count
));
2391 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2393 build_pointer_type (TREE_TYPE (kernels
)),
2395 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2396 build_int_cstu (unsigned_type_node
,
2397 global_variable_count
));
2398 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2400 build_pointer_type (TREE_TYPE (global_vars
)),
2403 tree img_desc_ctor
= build_constructor (hsa_image_desc_type
, img_desc_vec
);
2406 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_img_descriptor", 1);
2407 tree hsa_img_descriptor
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2408 get_identifier (tmp_name
),
2409 hsa_image_desc_type
);
2410 TREE_STATIC (hsa_img_descriptor
) = 1;
2411 TREE_READONLY (hsa_img_descriptor
) = 1;
2412 TREE_PUBLIC (hsa_img_descriptor
) = 0;
2413 DECL_ARTIFICIAL (hsa_img_descriptor
) = 1;
2414 DECL_IGNORED_P (hsa_img_descriptor
) = 1;
2415 DECL_EXTERNAL (hsa_img_descriptor
) = 0;
2416 TREE_CONSTANT (hsa_img_descriptor
) = 1;
2417 DECL_INITIAL (hsa_img_descriptor
) = img_desc_ctor
;
2418 varpool_node::finalize_decl (hsa_img_descriptor
);
2420 /* Construct the "host_table" libgomp expects. */
2421 tree index_type
= build_index_type (build_int_cst (integer_type_node
, 4));
2422 tree libgomp_host_table_type
= build_array_type (ptr_type_node
, index_type
);
2423 TYPE_ARTIFICIAL (libgomp_host_table_type
) = 1;
2424 vec
<constructor_elt
, va_gc
> *libgomp_host_table_vec
= NULL
;
2425 tree host_func_table_addr
= build_fold_addr_expr (host_func_table
);
2426 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2427 host_func_table_addr
);
2428 offset_int func_table_size
2429 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node
)) * kernel_count
;
2430 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2431 fold_build2 (POINTER_PLUS_EXPR
,
2432 TREE_TYPE (host_func_table_addr
),
2433 host_func_table_addr
,
2434 build_int_cst (size_type_node
,
2435 func_table_size
.to_uhwi
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2439 tree libgomp_host_table_ctor
= build_constructor (libgomp_host_table_type
,
2440 libgomp_host_table_vec
);
2441 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_libgomp_host_table", 1);
2442 tree hsa_libgomp_host_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2443 get_identifier (tmp_name
),
2444 libgomp_host_table_type
);
2446 TREE_STATIC (hsa_libgomp_host_table
) = 1;
2447 TREE_READONLY (hsa_libgomp_host_table
) = 1;
2448 TREE_PUBLIC (hsa_libgomp_host_table
) = 0;
2449 DECL_ARTIFICIAL (hsa_libgomp_host_table
) = 1;
2450 DECL_IGNORED_P (hsa_libgomp_host_table
) = 1;
2451 DECL_EXTERNAL (hsa_libgomp_host_table
) = 0;
2452 TREE_CONSTANT (hsa_libgomp_host_table
) = 1;
2453 DECL_INITIAL (hsa_libgomp_host_table
) = libgomp_host_table_ctor
;
2454 varpool_node::finalize_decl (hsa_libgomp_host_table
);
2456 /* Generate an initializer with a call to the registration routine. */
2458 tree offload_register
2459 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER
);
2460 gcc_checking_assert (offload_register
);
2462 tree
*hsa_ctor_stmts
= hsa_get_ctor_statements ();
2463 append_to_statement_list
2464 (build_call_expr (offload_register
, 4,
2465 build_int_cstu (unsigned_type_node
,
2466 GOMP_VERSION_PACK (GOMP_VERSION
,
2468 build_fold_addr_expr (hsa_libgomp_host_table
),
2469 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2470 build_fold_addr_expr (hsa_img_descriptor
)),
2473 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts
, DEFAULT_INIT_PRIORITY
);
2475 tree offload_unregister
2476 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER
);
2477 gcc_checking_assert (offload_unregister
);
2479 tree
*hsa_dtor_stmts
= hsa_get_dtor_statements ();
2480 append_to_statement_list
2481 (build_call_expr (offload_unregister
, 4,
2482 build_int_cstu (unsigned_type_node
,
2483 GOMP_VERSION_PACK (GOMP_VERSION
,
2485 build_fold_addr_expr (hsa_libgomp_host_table
),
2486 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2487 build_fold_addr_expr (hsa_img_descriptor
)),
2489 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts
, DEFAULT_INIT_PRIORITY
);
2492 /* Emit the brig module we have compiled to a section in the final assembly and
2493 also create a compile unit static constructor that will register the brig
2494 module with libgomp. */
2497 hsa_output_brig (void)
2499 section
*saved_section
;
2501 if (!brig_initialized
)
2504 for (unsigned i
= 0; i
< function_call_linkage
.length (); i
++)
2506 function_linkage_pair p
= function_call_linkage
[i
];
2508 BrigCodeOffset32_t
*func_offset
= function_offsets
->get (p
.function_decl
);
2509 gcc_assert (*func_offset
);
2510 BrigOperandCodeRef
*code_ref
2511 = (BrigOperandCodeRef
*) (brig_operand
.get_ptr_by_offset (p
.offset
));
2512 gcc_assert (code_ref
->base
.kind
== BRIG_KIND_OPERAND_CODE_REF
);
2513 code_ref
->ref
= lendian32 (*func_offset
);
2516 /* Iterate all function declarations and if we meet a function that should
2517 have module linkage and we are unable to emit HSAIL for the function,
2518 then change the linkage to program linkage. Doing so, we will emit
2519 a valid BRIG image. */
2520 if (hsa_failed_functions
!= NULL
&& emitted_declarations
!= NULL
)
2521 for (hash_map
<tree
, BrigDirectiveExecutable
*>::iterator it
2522 = emitted_declarations
->begin ();
2523 it
!= emitted_declarations
->end ();
2526 if (hsa_failed_functions
->contains ((*it
).first
))
2527 (*it
).second
->linkage
= BRIG_LINKAGE_PROGRAM
;
2530 saved_section
= in_section
;
2532 switch_to_section (get_section (BRIG_ELF_SECTION_NAME
, SECTION_NOTYPE
, NULL
));
2534 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, BRIG_LABEL_STRING
, 1);
2535 ASM_OUTPUT_LABEL (asm_out_file
, tmp_name
);
2536 tree brig_id
= get_identifier (tmp_name
);
2537 tree brig_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, brig_id
,
2539 SET_DECL_ASSEMBLER_NAME (brig_decl
, brig_id
);
2540 TREE_ADDRESSABLE (brig_decl
) = 1;
2541 TREE_READONLY (brig_decl
) = 1;
2542 DECL_ARTIFICIAL (brig_decl
) = 1;
2543 DECL_IGNORED_P (brig_decl
) = 1;
2544 TREE_STATIC (brig_decl
) = 1;
2545 TREE_PUBLIC (brig_decl
) = 0;
2546 TREE_USED (brig_decl
) = 1;
2547 DECL_INITIAL (brig_decl
) = brig_decl
;
2548 TREE_ASM_WRITTEN (brig_decl
) = 1;
2550 BrigModuleHeader module_header
;
2551 memcpy (&module_header
.identification
, "HSA BRIG",
2552 sizeof (module_header
.identification
));
2553 module_header
.brigMajor
= lendian32 (BRIG_VERSION_BRIG_MAJOR
);
2554 module_header
.brigMinor
= lendian32 (BRIG_VERSION_BRIG_MINOR
);
2555 uint64_t section_index
[3];
2557 int data_padding
, code_padding
, operand_padding
;
2558 data_padding
= HSA_SECTION_ALIGNMENT
2559 - brig_data
.total_size
% HSA_SECTION_ALIGNMENT
;
2560 code_padding
= HSA_SECTION_ALIGNMENT
2561 - brig_code
.total_size
% HSA_SECTION_ALIGNMENT
;
2562 operand_padding
= HSA_SECTION_ALIGNMENT
2563 - brig_operand
.total_size
% HSA_SECTION_ALIGNMENT
;
2565 uint64_t module_size
= sizeof (module_header
)
2566 + sizeof (section_index
)
2567 + brig_data
.total_size
2569 + brig_code
.total_size
2571 + brig_operand
.total_size
2573 gcc_assert ((module_size
% 16) == 0);
2574 module_header
.byteCount
= lendian64 (module_size
);
2575 memset (&module_header
.hash
, 0, sizeof (module_header
.hash
));
2576 module_header
.reserved
= 0;
2577 module_header
.sectionCount
= lendian32 (3);
2578 module_header
.sectionIndex
= lendian64 (sizeof (module_header
));
2579 assemble_string ((const char *) &module_header
, sizeof (module_header
));
2580 uint64_t off
= sizeof (module_header
) + sizeof (section_index
);
2581 section_index
[0] = lendian64 (off
);
2582 off
+= brig_data
.total_size
+ data_padding
;
2583 section_index
[1] = lendian64 (off
);
2584 off
+= brig_code
.total_size
+ code_padding
;
2585 section_index
[2] = lendian64 (off
);
2586 assemble_string ((const char *) §ion_index
, sizeof (section_index
));
2588 char padding
[HSA_SECTION_ALIGNMENT
];
2589 memset (padding
, 0, sizeof (padding
));
2591 brig_data
.output ();
2592 assemble_string (padding
, data_padding
);
2593 brig_code
.output ();
2594 assemble_string (padding
, code_padding
);
2595 brig_operand
.output ();
2596 assemble_string (padding
, operand_padding
);
2599 switch_to_section (saved_section
);
2601 hsa_output_libgomp_mapping (brig_decl
);
2603 hsa_free_decl_kernel_mapping ();
2604 brig_release_data ();
2605 hsa_deinit_compilation_unit_data ();
2607 delete emitted_declarations
;
2608 emitted_declarations
= NULL
;
2609 delete function_offsets
;
2610 function_offsets
= NULL
;