1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "hash-table.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "gimple-pretty-print.h"
42 #include "diagnostic-core.h"
45 #include "print-tree.h"
46 #include "symbol-summary.h"
48 #include "gomp-constants.h"
50 /* Convert VAL to little endian form, if necessary. */
53 lendian16 (uint16_t val
)
55 #if GCC_VERSION >= 4008
56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
59 return __builtin_bswap16 (val
);
60 #else /* __ORDER_PDP_ENDIAN__ */
64 // provide a safe slower default, with shifts and masking
65 #ifndef WORDS_BIGENDIAN
68 return (val
>> 8) | (val
<< 8);
73 /* Convert VAL to little endian form, if necessary. */
76 lendian32 (uint32_t val
)
78 #if GCC_VERSION >= 4006
79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
82 return __builtin_bswap32 (val
);
83 #else /* __ORDER_PDP_ENDIAN__ */
84 return (val
>> 16) | (val
<< 16);
87 // provide a safe slower default, with shifts and masking
88 #ifndef WORDS_BIGENDIAN
91 val
= ((val
& 0xff00ff00) >> 8) | ((val
& 0xff00ff) << 8);
92 return (val
>> 16) | (val
<< 16);
97 /* Convert VAL to little endian form, if necessary. */
100 lendian64 (uint64_t val
)
102 #if GCC_VERSION >= 4006
103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
106 return __builtin_bswap64 (val
);
107 #else /* __ORDER_PDP_ENDIAN__ */
108 return (((val
& 0xffffll
) << 48)
109 | ((val
& 0xffff0000ll
) << 16)
110 | ((val
& 0xffff00000000ll
) >> 16)
111 | ((val
& 0xffff000000000000ll
) >> 48));
114 // provide a safe slower default, with shifts and masking
115 #ifndef WORDS_BIGENDIAN
118 val
= (((val
& 0xff00ff00ff00ff00ll
) >> 8)
119 | ((val
& 0x00ff00ff00ff00ffll
) << 8));
120 val
= ((( val
& 0xffff0000ffff0000ll
) >> 16)
121 | (( val
& 0x0000ffff0000ffffll
) << 16));
122 return (val
>> 32) | (val
<< 32);
127 #define BRIG_ELF_SECTION_NAME ".brig"
128 #define BRIG_LABEL_STRING "hsa_brig"
129 #define BRIG_SECTION_DATA_NAME "hsa_data"
130 #define BRIG_SECTION_CODE_NAME "hsa_code"
131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
135 /* Required HSA section alignment. */
137 #define HSA_SECTION_ALIGNMENT 16
139 /* Chunks of BRIG binary data. */
141 struct hsa_brig_data_chunk
143 /* Size of the data already stored into a chunk. */
146 /* Pointer to the data. */
150 /* Structure representing a BRIG section, holding and writing its data. */
152 class hsa_brig_section
155 /* Section name that will be output to the BRIG. */
156 const char *section_name
;
157 /* Size in bytes of all data stored in the section. */
159 /* The size of the header of the section including padding. */
160 unsigned header_byte_count
;
161 /* The size of the header of the section without any padding. */
162 unsigned header_byte_delta
;
164 void init (const char *name
);
167 unsigned add (const void *data
, unsigned len
, void **output
= NULL
);
168 void round_size_up (int factor
);
169 void *get_ptr_by_offset (unsigned int offset
);
172 void allocate_new_chunk ();
174 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
175 vec
<struct hsa_brig_data_chunk
> chunks
;
177 /* More convenient access to the last chunk from the vector above. */
178 struct hsa_brig_data_chunk
*cur_chunk
;
181 static struct hsa_brig_section brig_data
, brig_code
, brig_operand
;
182 static uint32_t brig_insn_count
;
183 static bool brig_initialized
= false;
185 /* Mapping between emitted HSA functions and their offset in code segment. */
186 static hash_map
<tree
, BrigCodeOffset32_t
> *function_offsets
;
188 /* Hash map of emitted function declarations. */
189 static hash_map
<tree
, BrigDirectiveExecutable
*> *emitted_declarations
;
191 /* Hash table of emitted internal function declaration offsets. */
192 hash_table
<hsa_internal_fn_hasher
> *hsa_emitted_internal_decls
;
194 /* List of sbr instructions. */
195 static vec
<hsa_insn_sbr
*> *switch_instructions
;
197 struct function_linkage_pair
199 function_linkage_pair (tree decl
, unsigned int off
)
200 : function_decl (decl
), offset (off
) {}
202 /* Declaration of called function. */
205 /* Offset in operand section. */
209 /* Vector of function calls where we need to resolve function offsets. */
210 static auto_vec
<function_linkage_pair
> function_call_linkage
;
212 /* Add a new chunk, allocate data for it and initialize it. */
215 hsa_brig_section::allocate_new_chunk ()
217 struct hsa_brig_data_chunk new_chunk
;
219 new_chunk
.data
= XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE
);
221 cur_chunk
= chunks
.safe_push (new_chunk
);
224 /* Initialize the brig section. */
227 hsa_brig_section::init (const char *name
)
230 /* While the following computation is basically wrong, because the intent
231 certainly wasn't to have the first character of name and padding, which
232 are a part of sizeof (BrigSectionHeader), included in the first addend,
233 this is what the disassembler expects. */
234 total_size
= sizeof (BrigSectionHeader
) + strlen (section_name
);
236 allocate_new_chunk ();
237 header_byte_delta
= total_size
;
239 header_byte_count
= total_size
;
242 /* Free all data in the section. */
245 hsa_brig_section::release ()
247 for (unsigned i
= 0; i
< chunks
.length (); i
++)
248 free (chunks
[i
].data
);
253 /* Write the section to the output file to a section with the name given at
254 initialization. Switches the output section and does not restore it. */
257 hsa_brig_section::output ()
259 struct BrigSectionHeader section_header
;
262 section_header
.byteCount
= lendian64 (total_size
);
263 section_header
.headerByteCount
= lendian32 (header_byte_count
);
264 section_header
.nameLength
= lendian32 (strlen (section_name
));
265 assemble_string ((const char *) §ion_header
, 16);
266 assemble_string (section_name
, (section_header
.nameLength
));
267 memset (&padding
, 0, sizeof (padding
));
268 /* This is also a consequence of the wrong header size computation described
269 in a comment in hsa_brig_section::init. */
270 assemble_string (padding
, 8);
271 for (unsigned i
= 0; i
< chunks
.length (); i
++)
272 assemble_string (chunks
[i
].data
, chunks
[i
].size
);
275 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
276 which it was stored. If OUTPUT is not NULL, store into it the pointer to
277 the place where DATA was actually stored. */
280 hsa_brig_section::add (const void *data
, unsigned len
, void **output
)
282 unsigned offset
= total_size
;
284 gcc_assert (len
<= BRIG_CHUNK_MAX_SIZE
);
285 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- len
))
286 allocate_new_chunk ();
288 char *dst
= cur_chunk
->data
+ cur_chunk
->size
;
289 memcpy (dst
, data
, len
);
292 cur_chunk
->size
+= len
;
298 /* Add padding to section so that its size is divisible by FACTOR. */
301 hsa_brig_section::round_size_up (int factor
)
303 unsigned padding
, res
= total_size
% factor
;
308 padding
= factor
- res
;
309 total_size
+= padding
;
310 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- padding
))
312 padding
-= BRIG_CHUNK_MAX_SIZE
- cur_chunk
->size
;
313 cur_chunk
->size
= BRIG_CHUNK_MAX_SIZE
;
314 allocate_new_chunk ();
317 cur_chunk
->size
+= padding
;
320 /* Return pointer to data by global OFFSET in the section. */
323 hsa_brig_section::get_ptr_by_offset (unsigned int offset
)
325 gcc_assert (offset
< total_size
);
326 offset
-= header_byte_delta
;
329 for (i
= 0; offset
>= chunks
[i
].size
; i
++)
330 offset
-= chunks
[i
].size
;
332 return chunks
[i
].data
+ offset
;
335 /* BRIG string data hashing. */
337 struct brig_string_slot
345 /* Hash table helpers. */
347 struct brig_string_slot_hasher
: pointer_hash
<brig_string_slot
>
349 static inline hashval_t
hash (const value_type
);
350 static inline bool equal (const value_type
, const compare_type
);
351 static inline void remove (value_type
);
354 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
355 to support strings that may not end in '\0'. */
358 brig_string_slot_hasher::hash (const value_type ds
)
360 hashval_t r
= ds
->len
;
363 for (i
= 0; i
< ds
->len
; i
++)
364 r
= r
* 67 + (unsigned) ds
->s
[i
] - 113;
365 r
= r
* 67 + (unsigned) ds
->prefix
- 113;
369 /* Returns nonzero if DS1 and DS2 are equal. */
372 brig_string_slot_hasher::equal (const value_type ds1
, const compare_type ds2
)
374 if (ds1
->len
== ds2
->len
)
375 return ds1
->prefix
== ds2
->prefix
376 && memcmp (ds1
->s
, ds2
->s
, ds1
->len
) == 0;
381 /* Deallocate memory for DS upon its removal. */
384 brig_string_slot_hasher::remove (value_type ds
)
386 free (const_cast<char *> (ds
->s
));
390 /* Hash for strings we output in order not to duplicate them needlessly. */
392 static hash_table
<brig_string_slot_hasher
> *brig_string_htab
;
394 /* Emit a null terminated string STR to the data section and return its
395 offset in it. If PREFIX is non-zero, output it just before STR too.
396 Sanitize the string if SANITIZE option is set to true. */
399 brig_emit_string (const char *str
, char prefix
= 0, bool sanitize
= true)
401 unsigned slen
= strlen (str
);
402 unsigned offset
, len
= slen
+ (prefix
? 1 : 0);
403 uint32_t hdr_len
= lendian32 (len
);
404 brig_string_slot s_slot
;
405 brig_string_slot
**slot
;
408 str2
= xstrdup (str
);
411 hsa_sanitize_name (str2
);
414 s_slot
.prefix
= prefix
;
417 slot
= brig_string_htab
->find_slot (&s_slot
, INSERT
);
420 brig_string_slot
*new_slot
= XCNEW (brig_string_slot
);
422 /* In theory we should fill in BrigData but that would mean copying
423 the string to a buffer for no reason, so we just emulate it. */
424 offset
= brig_data
.add (&hdr_len
, sizeof (hdr_len
));
426 brig_data
.add (&prefix
, 1);
428 brig_data
.add (str2
, slen
);
429 brig_data
.round_size_up (4);
431 /* TODO: could use the string we just copied into
432 brig_string->cur_chunk */
434 new_slot
->len
= slen
;
435 new_slot
->prefix
= prefix
;
436 new_slot
->offset
= offset
;
441 offset
= (*slot
)->offset
;
448 /* Linked list of queued operands. */
450 static struct operand_queue
452 /* First from the chain of queued operands. */
453 hsa_op_base
*first_op
, *last_op
;
455 /* The offset at which the next operand will be enqueued. */
456 unsigned projected_size
;
460 /* Unless already initialized, initialize infrastructure to produce BRIG. */
467 if (brig_initialized
)
470 brig_string_htab
= new hash_table
<brig_string_slot_hasher
> (37);
471 brig_data
.init (BRIG_SECTION_DATA_NAME
);
472 brig_code
.init (BRIG_SECTION_CODE_NAME
);
473 brig_operand
.init (BRIG_SECTION_OPERAND_NAME
);
474 brig_initialized
= true;
476 struct BrigDirectiveModule moddir
;
477 memset (&moddir
, 0, sizeof (moddir
));
478 moddir
.base
.byteCount
= lendian16 (sizeof (moddir
));
481 if (main_input_filename
&& *main_input_filename
!= '\0')
483 const char *part
= strrchr (main_input_filename
, '/');
485 part
= main_input_filename
;
488 modname
= concat ("&__hsa_module_", part
, NULL
);
489 char *extension
= strchr (modname
, '.');
493 /* As in LTO mode, we have to emit a different module names. */
496 part
= strrchr (asm_file_name
, '/');
498 part
= asm_file_name
;
502 asprintf (&modname2
, "%s_%s", modname
, part
);
507 hsa_sanitize_name (modname
);
508 moddir
.name
= brig_emit_string (modname
);
512 moddir
.name
= brig_emit_string ("__hsa_module_unnamed", '&');
513 moddir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_MODULE
);
514 moddir
.hsailMajor
= lendian32 (BRIG_VERSION_HSAIL_MAJOR
);
515 moddir
.hsailMinor
= lendian32 (BRIG_VERSION_HSAIL_MINOR
);
516 moddir
.profile
= hsa_full_profile_p () ? BRIG_PROFILE_FULL
: BRIG_PROFILE_BASE
;
517 if (hsa_machine_large_p ())
518 moddir
.machineModel
= BRIG_MACHINE_LARGE
;
520 moddir
.machineModel
= BRIG_MACHINE_SMALL
;
521 moddir
.defaultFloatRound
= BRIG_ROUND_FLOAT_DEFAULT
;
522 brig_code
.add (&moddir
, sizeof (moddir
));
525 /* Free all BRIG data. */
528 brig_release_data (void)
530 delete brig_string_htab
;
531 brig_data
.release ();
532 brig_code
.release ();
533 brig_operand
.release ();
535 brig_initialized
= 0;
538 /* Enqueue operation OP. Return the offset at which it will be stored. */
541 enqueue_op (hsa_op_base
*op
)
545 if (op
->m_brig_op_offset
)
546 return op
->m_brig_op_offset
;
548 ret
= op_queue
.projected_size
;
549 op
->m_brig_op_offset
= op_queue
.projected_size
;
551 if (!op_queue
.first_op
)
552 op_queue
.first_op
= op
;
554 op_queue
.last_op
->m_next
= op
;
555 op_queue
.last_op
= op
;
557 if (is_a
<hsa_op_immed
*> (op
))
558 op_queue
.projected_size
+= sizeof (struct BrigOperandConstantBytes
);
559 else if (is_a
<hsa_op_reg
*> (op
))
560 op_queue
.projected_size
+= sizeof (struct BrigOperandRegister
);
561 else if (is_a
<hsa_op_address
*> (op
))
562 op_queue
.projected_size
+= sizeof (struct BrigOperandAddress
);
563 else if (is_a
<hsa_op_code_ref
*> (op
))
564 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeRef
);
565 else if (is_a
<hsa_op_code_list
*> (op
))
566 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeList
);
567 else if (is_a
<hsa_op_operand_list
*> (op
))
568 op_queue
.projected_size
+= sizeof (struct BrigOperandOperandList
);
574 static void emit_immediate_operand (hsa_op_immed
*imm
);
576 /* Emit directive describing a symbol if it has not been emitted already.
577 Return the offset of the directive. */
580 emit_directive_variable (struct hsa_symbol
*symbol
)
582 struct BrigDirectiveVariable dirvar
;
583 unsigned name_offset
;
584 static unsigned res_name_offset
;
586 if (symbol
->m_directive_offset
)
587 return symbol
->m_directive_offset
;
589 memset (&dirvar
, 0, sizeof (dirvar
));
590 dirvar
.base
.byteCount
= lendian16 (sizeof (dirvar
));
591 dirvar
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE
);
592 dirvar
.allocation
= symbol
->m_allocation
;
594 char prefix
= symbol
->m_global_scope_p
? '&' : '%';
596 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == RESULT_DECL
)
598 if (res_name_offset
== 0)
599 res_name_offset
= brig_emit_string (symbol
->m_name
, '%');
600 name_offset
= res_name_offset
;
602 else if (symbol
->m_name
)
603 name_offset
= brig_emit_string (symbol
->m_name
, prefix
);
607 snprintf (buf
, 64, "__%s_%i", hsa_seg_name (symbol
->m_segment
),
608 symbol
->m_name_number
);
609 name_offset
= brig_emit_string (buf
, prefix
);
612 dirvar
.name
= lendian32 (name_offset
);
614 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == CONST_DECL
)
616 hsa_op_immed
*tmp
= new hsa_op_immed (DECL_INITIAL (symbol
->m_decl
));
617 dirvar
.init
= lendian32 (enqueue_op (tmp
));
621 dirvar
.type
= lendian16 (symbol
->m_type
);
622 dirvar
.segment
= symbol
->m_segment
;
623 dirvar
.align
= symbol
->m_align
;
624 dirvar
.linkage
= symbol
->m_linkage
;
625 dirvar
.dim
.lo
= symbol
->m_dim
;
626 dirvar
.dim
.hi
= symbol
->m_dim
>> 32;
628 /* Global variables are just declared and linked via HSA runtime. */
629 if (symbol
->m_linkage
!= BRIG_ALLOCATION_PROGRAM
)
630 dirvar
.modifier
|= BRIG_VARIABLE_DEFINITION
;
633 if (symbol
->m_cst_value
)
635 dirvar
.modifier
|= BRIG_VARIABLE_CONST
;
636 dirvar
.init
= lendian32 (enqueue_op (symbol
->m_cst_value
));
639 symbol
->m_directive_offset
= brig_code
.add (&dirvar
, sizeof (dirvar
));
640 return symbol
->m_directive_offset
;
643 /* Emit directives describing either a function declaration or definition F and
644 return the produced BrigDirectiveExecutable structure. The function does
645 not take into account any instructions when calculating nextModuleEntry
646 field of the produced BrigDirectiveExecutable structure so when emitting
647 actual definitions, this field needs to be updated after all of the function
648 is actually added to the code section. */
650 static BrigDirectiveExecutable
*
651 emit_function_directives (hsa_function_representation
*f
, bool is_declaration
)
653 struct BrigDirectiveExecutable fndir
;
654 unsigned name_offset
, inarg_off
, scoped_off
, next_toplev_off
;
659 if (!f
->m_declaration_p
)
660 for (int i
= 0; f
->m_global_symbols
.iterate (i
, &sym
); i
++)
662 gcc_assert (!sym
->m_emitted_to_brig
);
663 sym
->m_emitted_to_brig
= true;
664 emit_directive_variable (sym
);
668 name_offset
= brig_emit_string (f
->m_name
, '&');
669 inarg_off
= brig_code
.total_size
+ sizeof (fndir
)
670 + (f
->m_output_arg
? sizeof (struct BrigDirectiveVariable
) : 0);
671 scoped_off
= inarg_off
672 + f
->m_input_args
.length () * sizeof (struct BrigDirectiveVariable
);
674 if (!f
->m_declaration_p
)
676 count
+= f
->m_spill_symbols
.length ();
677 count
+= f
->m_private_variables
.length ();
680 next_toplev_off
= scoped_off
+ count
* sizeof (struct BrigDirectiveVariable
);
682 memset (&fndir
, 0, sizeof (fndir
));
683 fndir
.base
.byteCount
= lendian16 (sizeof (fndir
));
684 fndir
.base
.kind
= lendian16 (f
->m_kern_p
? BRIG_KIND_DIRECTIVE_KERNEL
685 : BRIG_KIND_DIRECTIVE_FUNCTION
);
686 fndir
.name
= lendian32 (name_offset
);
687 fndir
.inArgCount
= lendian16 (f
->m_input_args
.length ());
688 fndir
.outArgCount
= lendian16 (f
->m_output_arg
? 1 : 0);
689 fndir
.firstInArg
= lendian32 (inarg_off
);
690 fndir
.firstCodeBlockEntry
= lendian32 (scoped_off
);
691 fndir
.nextModuleEntry
= lendian32 (next_toplev_off
);
692 fndir
.linkage
= f
->get_linkage ();
693 if (!f
->m_declaration_p
)
694 fndir
.modifier
|= BRIG_EXECUTABLE_DEFINITION
;
695 memset (&fndir
.reserved
, 0, sizeof (fndir
.reserved
));
697 /* Once we put a definition of function_offsets, we should not overwrite
698 it with a declaration of the function. */
699 if (f
->m_internal_fn
== NULL
)
701 if (!function_offsets
->get (f
->m_decl
) || !is_declaration
)
702 function_offsets
->put (f
->m_decl
, brig_code
.total_size
);
706 /* Internal function. */
707 hsa_internal_fn
**slot
708 = hsa_emitted_internal_decls
->find_slot (f
->m_internal_fn
, INSERT
);
709 hsa_internal_fn
*int_fn
= new hsa_internal_fn (f
->m_internal_fn
);
710 int_fn
->m_offset
= brig_code
.total_size
;
714 brig_code
.add (&fndir
, sizeof (fndir
), &ptr_to_fndir
);
717 emit_directive_variable (f
->m_output_arg
);
718 for (unsigned i
= 0; i
< f
->m_input_args
.length (); i
++)
719 emit_directive_variable (f
->m_input_args
[i
]);
721 if (!f
->m_declaration_p
)
723 for (int i
= 0; f
->m_spill_symbols
.iterate (i
, &sym
); i
++)
725 emit_directive_variable (sym
);
728 for (unsigned i
= 0; i
< f
->m_private_variables
.length (); i
++)
730 emit_directive_variable (f
->m_private_variables
[i
]);
735 return (BrigDirectiveExecutable
*) ptr_to_fndir
;
738 /* Emit a label directive for the given HBB. We assume it is about to start on
739 the current offset in the code section. */
742 emit_bb_label_directive (hsa_bb
*hbb
)
744 struct BrigDirectiveLabel lbldir
;
746 lbldir
.base
.byteCount
= lendian16 (sizeof (lbldir
));
747 lbldir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_LABEL
);
749 snprintf (buf
, 32, "BB_%u_%i", DECL_UID (current_function_decl
),
751 lbldir
.name
= lendian32 (brig_emit_string (buf
, '@'));
753 hbb
->m_label_ref
.m_directive_offset
= brig_code
.add (&lbldir
,
758 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
759 holding such, for constants and registers. */
762 regtype_for_type (BrigType16_t t
)
781 case BRIG_TYPE_U16X2
:
783 case BRIG_TYPE_S16X2
:
784 case BRIG_TYPE_F16X2
:
785 return BRIG_TYPE_B32
;
792 case BRIG_TYPE_U16X4
:
793 case BRIG_TYPE_U32X2
:
795 case BRIG_TYPE_S16X4
:
796 case BRIG_TYPE_S32X2
:
797 case BRIG_TYPE_F16X4
:
798 case BRIG_TYPE_F32X2
:
799 return BRIG_TYPE_B64
;
802 case BRIG_TYPE_U8X16
:
803 case BRIG_TYPE_U16X8
:
804 case BRIG_TYPE_U32X4
:
805 case BRIG_TYPE_U64X2
:
806 case BRIG_TYPE_S8X16
:
807 case BRIG_TYPE_S16X8
:
808 case BRIG_TYPE_S32X4
:
809 case BRIG_TYPE_S64X2
:
810 case BRIG_TYPE_F16X8
:
811 case BRIG_TYPE_F32X4
:
812 case BRIG_TYPE_F64X2
:
813 return BRIG_TYPE_B128
;
820 /* Return the length of the BRIG type TYPE that is going to be streamed out as
821 an immediate constant (so it must not be B1). */
824 hsa_get_imm_brig_type_len (BrigType16_t type
)
826 BrigType16_t base_type
= type
& BRIG_TYPE_BASE_MASK
;
827 BrigType16_t pack_type
= type
& BRIG_TYPE_PACK_MASK
;
831 case BRIG_TYPE_PACK_NONE
:
833 case BRIG_TYPE_PACK_32
:
835 case BRIG_TYPE_PACK_64
:
837 case BRIG_TYPE_PACK_128
:
871 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
872 If NEED_LEN is not equal to zero, shrink or extend the value
873 to NEED_LEN bytes. Return how many bytes were written. */
876 emit_immediate_scalar_to_buffer (tree value
, char *data
, unsigned need_len
)
878 union hsa_bytes bytes
;
880 memset (&bytes
, 0, sizeof (bytes
));
881 tree type
= TREE_TYPE (value
);
882 gcc_checking_assert (TREE_CODE (type
) != VECTOR_TYPE
);
884 unsigned data_len
= tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
885 if (INTEGRAL_TYPE_P (type
)
886 || (POINTER_TYPE_P (type
) && TREE_CODE (value
) == INTEGER_CST
))
890 bytes
.b8
= (uint8_t) TREE_INT_CST_LOW (value
);
893 bytes
.b16
= (uint16_t) TREE_INT_CST_LOW (value
);
896 bytes
.b32
= (uint32_t) TREE_INT_CST_LOW (value
);
899 bytes
.b64
= (uint64_t) TREE_INT_CST_LOW (value
);
904 else if (SCALAR_FLOAT_TYPE_P (type
))
908 sorry ("Support for HSA does not implement immediate 16 bit FPU "
912 unsigned int_len
= GET_MODE_SIZE (TYPE_MODE (type
));
913 /* There are always 32 bits in each long, no matter the size of
917 real_to_target (tmp
, TREE_REAL_CST_PTR (value
), TYPE_MODE (type
));
920 bytes
.b32
= (uint32_t) tmp
[0];
923 bytes
.b64
= (uint64_t)(uint32_t) tmp
[1];
925 bytes
.b64
|= (uint32_t) tmp
[0];
937 memcpy (data
, &bytes
, len
);
942 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size
)
945 *brig_repr_size
= hsa_get_imm_brig_type_len (m_type
);
947 if (m_tree_value
!= NULL_TREE
)
949 /* Update brig_repr_size for special tree values. */
950 if (TREE_CODE (m_tree_value
) == STRING_CST
)
951 *brig_repr_size
= TREE_STRING_LENGTH (m_tree_value
);
952 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
954 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value
)));
956 unsigned total_len
= *brig_repr_size
;
958 /* As we can have a constructor with fewer elements, fill the memory
960 brig_repr
= XCNEWVEC (char, total_len
);
963 if (TREE_CODE (m_tree_value
) == VECTOR_CST
)
965 int i
, num
= VECTOR_CST_NELTS (m_tree_value
);
966 for (i
= 0; i
< num
; i
++)
968 tree v
= VECTOR_CST_ELT (m_tree_value
, i
);
969 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
973 /* Vectors should have the exact size. */
974 gcc_assert (total_len
== 0);
976 else if (TREE_CODE (m_tree_value
) == STRING_CST
)
977 memcpy (brig_repr
, TREE_STRING_POINTER (m_tree_value
),
978 TREE_STRING_LENGTH (m_tree_value
));
979 else if (TREE_CODE (m_tree_value
) == COMPLEX_CST
)
981 gcc_assert (total_len
% 2 == 0);
984 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value
), p
,
987 gcc_assert (actual
== total_len
/ 2);
991 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value
), p
,
993 gcc_assert (actual
== total_len
/ 2);
995 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
997 unsigned len
= CONSTRUCTOR_NELTS (m_tree_value
);
998 for (unsigned i
= 0; i
< len
; i
++)
1000 tree v
= CONSTRUCTOR_ELT (m_tree_value
, i
)->value
;
1001 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
1002 total_len
-= actual
;
1007 emit_immediate_scalar_to_buffer (m_tree_value
, p
, total_len
);
1013 switch (*brig_repr_size
)
1016 bytes
.b8
= (uint8_t) m_int_value
;
1019 bytes
.b16
= (uint16_t) m_int_value
;
1022 bytes
.b32
= (uint32_t) m_int_value
;
1025 bytes
.b64
= (uint64_t) m_int_value
;
1031 brig_repr
= XNEWVEC (char, *brig_repr_size
);
1032 memcpy (brig_repr
, &bytes
, *brig_repr_size
);
1038 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1039 have been massaged to comply with various HSA/BRIG type requirements, so the
1040 only important aspect of that is the length (because HSAIL might expect
1041 smaller constants or become bit-data). The data should be represented
1042 according to what is in the tree representation. */
1045 emit_immediate_operand (hsa_op_immed
*imm
)
1047 unsigned brig_repr_size
;
1048 char *brig_repr
= imm
->emit_to_buffer (&brig_repr_size
);
1049 struct BrigOperandConstantBytes out
;
1051 memset (&out
, 0, sizeof (out
));
1052 out
.base
.byteCount
= lendian16 (sizeof (out
));
1053 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES
);
1054 uint32_t byteCount
= lendian32 (brig_repr_size
);
1055 out
.type
= lendian16 (imm
->m_type
);
1056 out
.bytes
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1057 brig_operand
.add (&out
, sizeof (out
));
1058 brig_data
.add (brig_repr
, brig_repr_size
);
1059 brig_data
.round_size_up (4);
1064 /* Emit a register BRIG operand REG. */
1067 emit_register_operand (hsa_op_reg
*reg
)
1069 struct BrigOperandRegister out
;
1071 out
.base
.byteCount
= lendian16 (sizeof (out
));
1072 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_REGISTER
);
1073 out
.regNum
= lendian32 (reg
->m_hard_num
);
1075 switch (regtype_for_type (reg
->m_type
))
1078 out
.regKind
= BRIG_REGISTER_KIND_SINGLE
;
1081 out
.regKind
= BRIG_REGISTER_KIND_DOUBLE
;
1083 case BRIG_TYPE_B128
:
1084 out
.regKind
= BRIG_REGISTER_KIND_QUAD
;
1087 out
.regKind
= BRIG_REGISTER_KIND_CONTROL
;
1093 brig_operand
.add (&out
, sizeof (out
));
1096 /* Emit an address BRIG operand ADDR. */
1099 emit_address_operand (hsa_op_address
*addr
)
1101 struct BrigOperandAddress out
;
1103 out
.base
.byteCount
= lendian16 (sizeof (out
));
1104 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_ADDRESS
);
1105 out
.symbol
= addr
->m_symbol
1106 ? lendian32 (emit_directive_variable (addr
->m_symbol
)) : 0;
1107 out
.reg
= addr
->m_reg
? lendian32 (enqueue_op (addr
->m_reg
)) : 0;
1109 if (sizeof (addr
->m_imm_offset
) == 8)
1111 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1112 out
.offset
.hi
= lendian32 (addr
->m_imm_offset
>> 32);
1116 gcc_assert (sizeof (addr
->m_imm_offset
) == 4);
1117 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1121 brig_operand
.add (&out
, sizeof (out
));
1124 /* Emit a code reference operand REF. */
1127 emit_code_ref_operand (hsa_op_code_ref
*ref
)
1129 struct BrigOperandCodeRef out
;
1131 out
.base
.byteCount
= lendian16 (sizeof (out
));
1132 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_REF
);
1133 out
.ref
= lendian32 (ref
->m_directive_offset
);
1134 brig_operand
.add (&out
, sizeof (out
));
1137 /* Emit a code list operand CODE_LIST. */
1140 emit_code_list_operand (hsa_op_code_list
*code_list
)
1142 struct BrigOperandCodeList out
;
1143 unsigned args
= code_list
->m_offsets
.length ();
1145 for (unsigned i
= 0; i
< args
; i
++)
1146 gcc_assert (code_list
->m_offsets
[i
]);
1148 out
.base
.byteCount
= lendian16 (sizeof (out
));
1149 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_LIST
);
1151 uint32_t byteCount
= lendian32 (4 * args
);
1153 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1154 brig_data
.add (code_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1155 brig_data
.round_size_up (4);
1156 brig_operand
.add (&out
, sizeof (out
));
1159 /* Emit an operand list operand OPERAND_LIST. */
1162 emit_operand_list_operand (hsa_op_operand_list
*operand_list
)
1164 struct BrigOperandOperandList out
;
1165 unsigned args
= operand_list
->m_offsets
.length ();
1167 for (unsigned i
= 0; i
< args
; i
++)
1168 gcc_assert (operand_list
->m_offsets
[i
]);
1170 out
.base
.byteCount
= lendian16 (sizeof (out
));
1171 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST
);
1173 uint32_t byteCount
= lendian32 (4 * args
);
1175 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1176 brig_data
.add (operand_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1177 brig_data
.round_size_up (4);
1178 brig_operand
.add (&out
, sizeof (out
));
1181 /* Emit all operands queued for writing. */
1184 emit_queued_operands (void)
1186 for (hsa_op_base
*op
= op_queue
.first_op
; op
; op
= op
->m_next
)
1188 gcc_assert (op
->m_brig_op_offset
== brig_operand
.total_size
);
1189 if (hsa_op_immed
*imm
= dyn_cast
<hsa_op_immed
*> (op
))
1190 emit_immediate_operand (imm
);
1191 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
1192 emit_register_operand (reg
);
1193 else if (hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
))
1194 emit_address_operand (addr
);
1195 else if (hsa_op_code_ref
*ref
= dyn_cast
<hsa_op_code_ref
*> (op
))
1196 emit_code_ref_operand (ref
);
1197 else if (hsa_op_code_list
*code_list
= dyn_cast
<hsa_op_code_list
*> (op
))
1198 emit_code_list_operand (code_list
);
1199 else if (hsa_op_operand_list
*l
= dyn_cast
<hsa_op_operand_list
*> (op
))
1200 emit_operand_list_operand (l
);
1206 /* Emit directives describing the function that is used for
1207 a function declaration. */
1209 static BrigDirectiveExecutable
*
1210 emit_function_declaration (tree decl
)
1212 hsa_function_representation
*f
= hsa_generate_function_declaration (decl
);
1214 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1215 emit_queued_operands ();
1222 /* Emit directives describing the function that is used for
1223 an internal function declaration. */
1225 static BrigDirectiveExecutable
*
1226 emit_internal_fn_decl (hsa_internal_fn
*fn
)
1228 hsa_function_representation
*f
= hsa_generate_internal_fn_decl (fn
);
1230 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1231 emit_queued_operands ();
1238 /* Enqueue all operands of INSN and return offset to BRIG data section
1239 to list of operand offsets. */
1242 emit_insn_operands (hsa_insn_basic
*insn
)
1244 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1247 unsigned l
= insn
->operand_count ();
1249 /* We have N operands so use 4 * N for the byte_count. */
1250 uint32_t byte_count
= lendian32 (4 * l
);
1251 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1254 operand_offsets
.safe_grow (l
);
1255 for (unsigned i
= 0; i
< l
; i
++)
1256 operand_offsets
[i
] = lendian32 (enqueue_op (insn
->get_op (i
)));
1258 brig_data
.add (operand_offsets
.address (),
1259 l
* sizeof (BrigOperandOffset32_t
));
1261 brig_data
.round_size_up (4);
1265 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1266 to BRIG data section to list of operand offsets. */
1269 emit_operands (hsa_op_base
*op0
, hsa_op_base
*op1
= NULL
,
1270 hsa_op_base
*op2
= NULL
)
1272 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1275 gcc_checking_assert (op0
!= NULL
);
1276 operand_offsets
.safe_push (enqueue_op (op0
));
1280 operand_offsets
.safe_push (enqueue_op (op1
));
1282 operand_offsets
.safe_push (enqueue_op (op2
));
1285 unsigned l
= operand_offsets
.length ();
1287 /* We have N operands so use 4 * N for the byte_count. */
1288 uint32_t byte_count
= lendian32 (4 * l
);
1290 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1291 brig_data
.add (operand_offsets
.address (),
1292 l
* sizeof (BrigOperandOffset32_t
));
1294 brig_data
.round_size_up (4);
1299 /* Emit an HSA memory instruction and all necessary directives, schedule
1300 necessary operands for writing. */
1303 emit_memory_insn (hsa_insn_mem
*mem
)
1305 struct BrigInstMem repr
;
1306 gcc_checking_assert (mem
->operand_count () == 2);
1308 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1310 /* This is necessary because of the erroneous typedef of
1311 BrigMemoryModifier8_t which introduces padding which may then contain
1312 random stuff (which we do not want so that we can test things don't
1314 memset (&repr
, 0, sizeof (repr
));
1315 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1316 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1317 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1318 repr
.base
.type
= lendian16 (mem
->m_type
);
1319 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1322 repr
.segment
= addr
->m_symbol
->m_segment
;
1324 repr
.segment
= BRIG_SEGMENT_FLAT
;
1326 repr
.equivClass
= mem
->m_equiv_class
;
1327 repr
.align
= mem
->m_align
;
1328 if (mem
->m_opcode
== BRIG_OPCODE_LD
)
1329 repr
.width
= BRIG_WIDTH_1
;
1331 repr
.width
= BRIG_WIDTH_NONE
;
1332 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1333 brig_code
.add (&repr
, sizeof (repr
));
1337 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1338 necessary operands for writing. */
1341 emit_signal_insn (hsa_insn_signal
*mem
)
1343 struct BrigInstSignal repr
;
1345 memset (&repr
, 0, sizeof (repr
));
1346 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1347 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SIGNAL
);
1348 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1349 repr
.base
.type
= lendian16 (mem
->m_type
);
1350 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1352 repr
.memoryOrder
= mem
->m_memory_order
;
1353 repr
.signalOperation
= mem
->m_signalop
;
1354 repr
.signalType
= hsa_machine_large_p () ? BRIG_TYPE_SIG64
: BRIG_TYPE_SIG32
;
1356 brig_code
.add (&repr
, sizeof (repr
));
1360 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1361 necessary operands for writing. */
1364 emit_atomic_insn (hsa_insn_atomic
*mem
)
1366 struct BrigInstAtomic repr
;
1368 /* Either operand[0] or operand[1] must be an address operand. */
1369 hsa_op_address
*addr
= NULL
;
1370 if (is_a
<hsa_op_address
*> (mem
->get_op (0)))
1371 addr
= as_a
<hsa_op_address
*> (mem
->get_op (0));
1373 addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1375 memset (&repr
, 0, sizeof (repr
));
1376 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1377 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ATOMIC
);
1378 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1379 repr
.base
.type
= lendian16 (mem
->m_type
);
1380 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1383 repr
.segment
= addr
->m_symbol
->m_segment
;
1385 repr
.segment
= BRIG_SEGMENT_FLAT
;
1386 repr
.memoryOrder
= mem
->m_memoryorder
;
1387 repr
.memoryScope
= mem
->m_memoryscope
;
1388 repr
.atomicOperation
= mem
->m_atomicop
;
1390 brig_code
.add (&repr
, sizeof (repr
));
1394 /* Emit an HSA LDA instruction and all necessary directives, schedule
1395 necessary operands for writing. */
1398 emit_addr_insn (hsa_insn_basic
*insn
)
1400 struct BrigInstAddr repr
;
1402 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (insn
->get_op (1));
1404 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1405 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ADDR
);
1406 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1407 repr
.base
.type
= lendian16 (insn
->m_type
);
1408 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1411 repr
.segment
= addr
->m_symbol
->m_segment
;
1413 repr
.segment
= BRIG_SEGMENT_FLAT
;
1414 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1416 brig_code
.add (&repr
, sizeof (repr
));
1420 /* Emit an HSA segment conversion instruction and all necessary directives,
1421 schedule necessary operands for writing. */
1424 emit_segment_insn (hsa_insn_seg
*seg
)
1426 struct BrigInstSegCvt repr
;
1428 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1429 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SEG_CVT
);
1430 repr
.base
.opcode
= lendian16 (seg
->m_opcode
);
1431 repr
.base
.type
= lendian16 (seg
->m_type
);
1432 repr
.base
.operands
= lendian32 (emit_insn_operands (seg
));
1433 repr
.sourceType
= lendian16 (as_a
<hsa_op_reg
*> (seg
->get_op (1))->m_type
);
1434 repr
.segment
= seg
->m_segment
;
1437 brig_code
.add (&repr
, sizeof (repr
));
1442 /* Emit an HSA alloca instruction and all necessary directives,
1443 schedule necessary operands for writing. */
1446 emit_alloca_insn (hsa_insn_alloca
*alloca
)
1448 struct BrigInstMem repr
;
1449 gcc_checking_assert (alloca
->operand_count () == 2);
1451 memset (&repr
, 0, sizeof (repr
));
1452 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1453 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1454 repr
.base
.opcode
= lendian16 (alloca
->m_opcode
);
1455 repr
.base
.type
= lendian16 (alloca
->m_type
);
1456 repr
.base
.operands
= lendian32 (emit_insn_operands (alloca
));
1457 repr
.segment
= BRIG_SEGMENT_PRIVATE
;
1459 repr
.equivClass
= 0;
1460 repr
.align
= alloca
->m_align
;
1461 repr
.width
= BRIG_WIDTH_NONE
;
1462 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1463 brig_code
.add (&repr
, sizeof (repr
));
1467 /* Emit an HSA comparison instruction and all necessary directives,
1468 schedule necessary operands for writing. */
1471 emit_cmp_insn (hsa_insn_cmp
*cmp
)
1473 struct BrigInstCmp repr
;
1475 memset (&repr
, 0, sizeof (repr
));
1476 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1477 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CMP
);
1478 repr
.base
.opcode
= lendian16 (cmp
->m_opcode
);
1479 repr
.base
.type
= lendian16 (cmp
->m_type
);
1480 repr
.base
.operands
= lendian32 (emit_insn_operands (cmp
));
1482 if (is_a
<hsa_op_reg
*> (cmp
->get_op (1)))
1484 = lendian16 (as_a
<hsa_op_reg
*> (cmp
->get_op (1))->m_type
);
1487 = lendian16 (as_a
<hsa_op_immed
*> (cmp
->get_op (1))->m_type
);
1489 repr
.compare
= cmp
->m_compare
;
1492 brig_code
.add (&repr
, sizeof (repr
));
1496 /* Emit an HSA generic branching/sycnronization instruction. */
1499 emit_generic_branch_insn (hsa_insn_br
*br
)
1501 struct BrigInstBr repr
;
1502 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1503 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1504 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1505 repr
.width
= br
->m_width
;
1506 repr
.base
.type
= lendian16 (br
->m_type
);
1507 repr
.base
.operands
= lendian32 (emit_insn_operands (br
));
1508 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1510 brig_code
.add (&repr
, sizeof (repr
));
1514 /* Emit an HSA conditional branching instruction and all necessary directives,
1515 schedule necessary operands for writing. */
1518 emit_cond_branch_insn (hsa_insn_cbr
*br
)
1520 struct BrigInstBr repr
;
1522 basic_block target
= NULL
;
1526 /* At the moment we only handle direct conditional jumps. */
1527 gcc_assert (br
->m_opcode
== BRIG_OPCODE_CBR
);
1528 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1529 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1530 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1531 repr
.width
= br
->m_width
;
1532 /* For Conditional jumps the type is always B1. */
1533 repr
.base
.type
= lendian16 (BRIG_TYPE_B1
);
1535 FOR_EACH_EDGE (e
, ei
, br
->m_bb
->succs
)
1536 if (e
->flags
& EDGE_TRUE_VALUE
)
1541 gcc_assert (target
);
1544 = lendian32 (emit_operands (br
->get_op (0),
1545 &hsa_bb_for_bb (target
)->m_label_ref
));
1546 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1548 brig_code
.add (&repr
, sizeof (repr
));
1552 /* Emit an HSA unconditional jump branching instruction that points to
1553 a label REFERENCE. */
1556 emit_unconditional_jump (hsa_op_code_ref
*reference
)
1558 struct BrigInstBr repr
;
1560 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1561 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1562 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_BR
);
1563 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1564 /* Direct branches to labels must be width(all). */
1565 repr
.width
= BRIG_WIDTH_ALL
;
1567 repr
.base
.operands
= lendian32 (emit_operands (reference
));
1568 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1569 brig_code
.add (&repr
, sizeof (repr
));
1573 /* Emit an HSA switch jump instruction that uses a jump table to
1574 jump to a destination label. */
1577 emit_switch_insn (hsa_insn_sbr
*sbr
)
1579 struct BrigInstBr repr
;
1581 gcc_assert (sbr
->m_opcode
== BRIG_OPCODE_SBR
);
1582 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1583 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1584 repr
.base
.opcode
= lendian16 (sbr
->m_opcode
);
1585 repr
.width
= BRIG_WIDTH_1
;
1586 /* For Conditional jumps the type is always B1. */
1587 hsa_op_reg
*index
= as_a
<hsa_op_reg
*> (sbr
->get_op (0));
1588 repr
.base
.type
= lendian16 (index
->m_type
);
1590 = lendian32 (emit_operands (sbr
->get_op (0), sbr
->m_label_code_list
));
1591 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1593 brig_code
.add (&repr
, sizeof (repr
));
1597 /* Emit a HSA convert instruction and all necessary directives, schedule
1598 necessary operands for writing. */
1601 emit_cvt_insn (hsa_insn_cvt
*insn
)
1603 struct BrigInstCvt repr
;
1604 BrigType16_t srctype
;
1606 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1607 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CVT
);
1608 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1609 repr
.base
.type
= lendian16 (insn
->m_type
);
1610 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1612 if (is_a
<hsa_op_reg
*> (insn
->get_op (1)))
1613 srctype
= as_a
<hsa_op_reg
*> (insn
->get_op (1))->m_type
;
1615 srctype
= as_a
<hsa_op_immed
*> (insn
->get_op (1))->m_type
;
1616 repr
.sourceType
= lendian16 (srctype
);
1618 /* float to smaller float requires a rounding setting (we default
1620 if (hsa_type_float_p (insn
->m_type
)
1621 && (!hsa_type_float_p (srctype
)
1622 || ((insn
->m_type
& BRIG_TYPE_BASE_MASK
)
1623 < (srctype
& BRIG_TYPE_BASE_MASK
))))
1624 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1625 else if (hsa_type_integer_p (insn
->m_type
) &&
1626 hsa_type_float_p (srctype
))
1627 repr
.round
= BRIG_ROUND_INTEGER_ZERO
;
1629 repr
.round
= BRIG_ROUND_NONE
;
1630 brig_code
.add (&repr
, sizeof (repr
));
1634 /* Emit call instruction INSN, where this instruction must be closed
1635 within a call block instruction. */
1638 emit_call_insn (hsa_insn_call
*call
)
1640 struct BrigInstBr repr
;
1642 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1643 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1644 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_CALL
);
1645 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1648 = lendian32 (emit_operands (call
->m_result_code_list
, &call
->m_func
,
1649 call
->m_args_code_list
));
1651 /* Internal functions have not set m_called_function. */
1652 if (call
->m_called_function
)
1654 function_linkage_pair
pair (call
->m_called_function
,
1655 call
->m_func
.m_brig_op_offset
);
1656 function_call_linkage
.safe_push (pair
);
1660 hsa_internal_fn
*slot
1661 = hsa_emitted_internal_decls
->find (call
->m_called_internal_fn
);
1663 gcc_assert (slot
->m_offset
> 0);
1664 call
->m_func
.m_directive_offset
= slot
->m_offset
;
1667 repr
.width
= BRIG_WIDTH_ALL
;
1668 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1670 brig_code
.add (&repr
, sizeof (repr
));
1674 /* Emit argument block directive. */
1677 emit_arg_block_insn (hsa_insn_arg_block
*insn
)
1679 switch (insn
->m_kind
)
1681 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
:
1683 struct BrigDirectiveArgBlock repr
;
1684 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1685 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1686 brig_code
.add (&repr
, sizeof (repr
));
1688 for (unsigned i
= 0; i
< insn
->m_call_insn
->m_input_args
.length (); i
++)
1690 insn
->m_call_insn
->m_args_code_list
->m_offsets
[i
]
1691 = lendian32 (emit_directive_variable
1692 (insn
->m_call_insn
->m_input_args
[i
]));
1696 if (insn
->m_call_insn
->m_output_arg
)
1698 insn
->m_call_insn
->m_result_code_list
->m_offsets
[0]
1699 = lendian32 (emit_directive_variable
1700 (insn
->m_call_insn
->m_output_arg
));
1706 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END
:
1708 struct BrigDirectiveArgBlock repr
;
1709 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1710 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1711 brig_code
.add (&repr
, sizeof (repr
));
1721 /* Emit comment directive. */
1724 emit_comment_insn (hsa_insn_comment
*insn
)
1726 struct BrigDirectiveComment repr
;
1727 memset (&repr
, 0, sizeof (repr
));
1729 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1730 repr
.base
.kind
= lendian16 (insn
->m_opcode
);
1731 repr
.name
= brig_emit_string (insn
->m_comment
, '\0', false);
1732 brig_code
.add (&repr
, sizeof (repr
));
1735 /* Emit queue instruction INSN. */
1738 emit_queue_insn (hsa_insn_queue
*insn
)
1741 memset (&repr
, 0, sizeof (repr
));
1743 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1744 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_QUEUE
);
1745 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1746 repr
.base
.type
= lendian16 (insn
->m_type
);
1747 repr
.segment
= insn
->m_segment
;
1748 repr
.memoryOrder
= insn
->m_memory_order
;
1749 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1750 brig_data
.round_size_up (4);
1751 brig_code
.add (&repr
, sizeof (repr
));
1756 /* Emit source type instruction INSN. */
1759 emit_srctype_insn (hsa_insn_srctype
*insn
)
1761 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1762 struct BrigInstSourceType repr
;
1763 unsigned operand_count
= insn
->operand_count ();
1764 gcc_checking_assert (operand_count
>= 2);
1766 memset (&repr
, 0, sizeof (repr
));
1767 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1768 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1769 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1770 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1771 repr
.base
.type
= lendian16 (insn
->m_type
);
1773 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1774 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1778 /* Emit packed instruction INSN. */
1781 emit_packed_insn (hsa_insn_packed
*insn
)
1783 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1784 struct BrigInstSourceType repr
;
1785 unsigned operand_count
= insn
->operand_count ();
1786 gcc_checking_assert (operand_count
>= 2);
1788 memset (&repr
, 0, sizeof (repr
));
1789 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1790 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1791 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1792 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1793 repr
.base
.type
= lendian16 (insn
->m_type
);
1795 if (insn
->m_opcode
== BRIG_OPCODE_COMBINE
)
1797 /* Create operand list for packed type. */
1798 for (unsigned i
= 1; i
< operand_count
; i
++)
1800 gcc_checking_assert (insn
->get_op (i
));
1801 insn
->m_operand_list
->m_offsets
[i
- 1]
1802 = lendian32 (enqueue_op (insn
->get_op (i
)));
1805 repr
.base
.operands
= lendian32 (emit_operands (insn
->get_op (0),
1806 insn
->m_operand_list
));
1808 else if (insn
->m_opcode
== BRIG_OPCODE_EXPAND
)
1810 /* Create operand list for packed type. */
1811 for (unsigned i
= 0; i
< operand_count
- 1; i
++)
1813 gcc_checking_assert (insn
->get_op (i
));
1814 insn
->m_operand_list
->m_offsets
[i
]
1815 = lendian32 (enqueue_op (insn
->get_op (i
)));
1818 unsigned ops
= emit_operands (insn
->m_operand_list
,
1819 insn
->get_op (insn
->operand_count () - 1));
1820 repr
.base
.operands
= lendian32 (ops
);
1824 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1828 /* Emit a basic HSA instruction and all necessary directives, schedule
1829 necessary operands for writing. */
1832 emit_basic_insn (hsa_insn_basic
*insn
)
1834 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1835 struct BrigInstMod repr
;
1838 memset (&repr
, 0, sizeof (repr
));
1839 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstBasic
));
1840 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BASIC
);
1841 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1842 switch (insn
->m_opcode
)
1844 /* And the bit-logical operations need bit types and whine about
1845 arithmetic types :-/ */
1846 case BRIG_OPCODE_AND
:
1847 case BRIG_OPCODE_OR
:
1848 case BRIG_OPCODE_XOR
:
1849 case BRIG_OPCODE_NOT
:
1850 type
= regtype_for_type (insn
->m_type
);
1853 type
= insn
->m_type
;
1856 repr
.base
.type
= lendian16 (type
);
1857 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1859 if (hsa_type_packed_p (type
))
1861 if (hsa_type_float_p (type
)
1862 && !hsa_opcode_floating_bit_insn_p (insn
->m_opcode
))
1863 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1866 /* We assume that destination and sources agree in packing layout. */
1867 if (insn
->num_used_ops () >= 2)
1868 repr
.pack
= BRIG_PACK_PP
;
1870 repr
.pack
= BRIG_PACK_P
;
1872 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstMod
));
1873 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MOD
);
1874 brig_code
.add (&repr
, sizeof (struct BrigInstMod
));
1877 brig_code
.add (&repr
, sizeof (struct BrigInstBasic
));
1881 /* Emit an HSA instruction and all necessary directives, schedule necessary
1882 operands for writing. */
1885 emit_insn (hsa_insn_basic
*insn
)
1887 gcc_assert (!is_a
<hsa_insn_phi
*> (insn
));
1889 insn
->m_brig_offset
= brig_code
.total_size
;
1891 if (hsa_insn_signal
*signal
= dyn_cast
<hsa_insn_signal
*> (insn
))
1892 emit_signal_insn (signal
);
1893 else if (hsa_insn_atomic
*atom
= dyn_cast
<hsa_insn_atomic
*> (insn
))
1894 emit_atomic_insn (atom
);
1895 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
1896 emit_memory_insn (mem
);
1897 else if (insn
->m_opcode
== BRIG_OPCODE_LDA
)
1898 emit_addr_insn (insn
);
1899 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
1900 emit_segment_insn (seg
);
1901 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
1902 emit_cmp_insn (cmp
);
1903 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
1904 emit_cond_branch_insn (br
);
1905 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
1907 if (switch_instructions
== NULL
)
1908 switch_instructions
= new vec
<hsa_insn_sbr
*> ();
1910 switch_instructions
->safe_push (sbr
);
1911 emit_switch_insn (sbr
);
1913 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
1914 emit_generic_branch_insn (br
);
1915 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
1916 emit_arg_block_insn (block
);
1917 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
1918 emit_call_insn (call
);
1919 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
1920 emit_comment_insn (comment
);
1921 else if (hsa_insn_queue
*queue
= dyn_cast
<hsa_insn_queue
*> (insn
))
1922 emit_queue_insn (queue
);
1923 else if (hsa_insn_srctype
*srctype
= dyn_cast
<hsa_insn_srctype
*> (insn
))
1924 emit_srctype_insn (srctype
);
1925 else if (hsa_insn_packed
*packed
= dyn_cast
<hsa_insn_packed
*> (insn
))
1926 emit_packed_insn (packed
);
1927 else if (hsa_insn_cvt
*cvt
= dyn_cast
<hsa_insn_cvt
*> (insn
))
1928 emit_cvt_insn (cvt
);
1929 else if (hsa_insn_alloca
*alloca
= dyn_cast
<hsa_insn_alloca
*> (insn
))
1930 emit_alloca_insn (alloca
);
1932 emit_basic_insn (insn
);
1935 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1936 or we are about to finish emitting code, if it is NULL. If the fall through
1937 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1940 perhaps_emit_branch (basic_block bb
, basic_block next_bb
)
1942 basic_block t_bb
= NULL
, ff
= NULL
;
1947 /* If the last instruction of BB is a switch, ignore emission of all
1949 if (hsa_bb_for_bb (bb
)->m_last_insn
1950 && is_a
<hsa_insn_sbr
*> (hsa_bb_for_bb (bb
)->m_last_insn
))
1953 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1954 if (e
->flags
& EDGE_TRUE_VALUE
)
1965 if (!ff
|| ff
== next_bb
|| ff
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1968 emit_unconditional_jump (&hsa_bb_for_bb (ff
)->m_label_ref
);
1971 /* Emit the a function with name NAME to the various brig sections. */
1974 hsa_brig_emit_function (void)
1976 basic_block bb
, prev_bb
;
1977 hsa_insn_basic
*insn
;
1978 BrigDirectiveExecutable
*ptr_to_fndir
;
1982 brig_insn_count
= 0;
1983 memset (&op_queue
, 0, sizeof (op_queue
));
1984 op_queue
.projected_size
= brig_operand
.total_size
;
1986 if (!function_offsets
)
1987 function_offsets
= new hash_map
<tree
, BrigCodeOffset32_t
> ();
1989 if (!emitted_declarations
)
1990 emitted_declarations
= new hash_map
<tree
, BrigDirectiveExecutable
*> ();
1992 for (unsigned i
= 0; i
< hsa_cfun
->m_called_functions
.length (); i
++)
1994 tree called
= hsa_cfun
->m_called_functions
[i
];
1996 /* If the function has no definition, emit a declaration. */
1997 if (!emitted_declarations
->get (called
))
1999 BrigDirectiveExecutable
*e
= emit_function_declaration (called
);
2000 emitted_declarations
->put (called
, e
);
2004 for (unsigned i
= 0; i
< hsa_cfun
->m_called_internal_fns
.length (); i
++)
2006 hsa_internal_fn
*called
= hsa_cfun
->m_called_internal_fns
[i
];
2007 emit_internal_fn_decl (called
);
2010 ptr_to_fndir
= emit_function_directives (hsa_cfun
, false);
2011 for (insn
= hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->m_first_insn
;
2013 insn
= insn
->m_next
)
2015 prev_bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2016 FOR_EACH_BB_FN (bb
, cfun
)
2018 perhaps_emit_branch (prev_bb
, bb
);
2019 emit_bb_label_directive (hsa_bb_for_bb (bb
));
2020 for (insn
= hsa_bb_for_bb (bb
)->m_first_insn
; insn
; insn
= insn
->m_next
)
2024 perhaps_emit_branch (prev_bb
, NULL
);
2025 ptr_to_fndir
->nextModuleEntry
= lendian32 (brig_code
.total_size
);
2027 /* Fill up label references for all sbr instructions. */
2028 if (switch_instructions
)
2030 for (unsigned i
= 0; i
< switch_instructions
->length (); i
++)
2032 hsa_insn_sbr
*sbr
= (*switch_instructions
)[i
];
2033 for (unsigned j
= 0; j
< sbr
->m_jump_table
.length (); j
++)
2035 hsa_bb
*hbb
= hsa_bb_for_bb (sbr
->m_jump_table
[j
]);
2036 sbr
->m_label_code_list
->m_offsets
[j
]
2037 = hbb
->m_label_ref
.m_directive_offset
;
2041 switch_instructions
->release ();
2042 delete switch_instructions
;
2043 switch_instructions
= NULL
;
2048 fprintf (dump_file
, "------- After BRIG emission: -------\n");
2049 dump_hsa_cfun (dump_file
);
2052 emit_queued_operands ();
2055 /* Emit all OMP symbols related to OMP. */
2058 hsa_brig_emit_omp_symbols (void)
2061 emit_directive_variable (hsa_num_threads
);
2064 /* Create and return __hsa_global_variables symbol that contains
2065 all informations consumed by libgomp to link global variables
2066 with their string names used by an HSA kernel. */
2069 hsa_output_global_variables ()
2071 unsigned l
= hsa_global_variable_symbols
->elements ();
2073 tree variable_info_type
= make_node (RECORD_TYPE
);
2074 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2075 get_identifier ("name"), ptr_type_node
);
2076 DECL_CHAIN (id_f1
) = NULL_TREE
;
2077 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2078 get_identifier ("omp_data_size"),
2080 DECL_CHAIN (id_f2
) = id_f1
;
2081 finish_builtin_struct (variable_info_type
, "__hsa_variable_info", id_f2
,
2084 tree int_num_of_global_vars
;
2085 int_num_of_global_vars
= build_int_cst (uint32_type_node
, l
);
2086 tree global_vars_num_index_type
= build_index_type (int_num_of_global_vars
);
2087 tree global_vars_array_type
= build_array_type (variable_info_type
,
2088 global_vars_num_index_type
);
2089 TYPE_ARTIFICIAL (global_vars_array_type
) = 1;
2091 vec
<constructor_elt
, va_gc
> *global_vars_vec
= NULL
;
2093 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
2094 = hsa_global_variable_symbols
->begin ();
2095 it
!= hsa_global_variable_symbols
->end (); ++it
)
2097 unsigned len
= strlen ((*it
)->m_name
);
2098 char *copy
= XNEWVEC (char, len
+ 2);
2100 memcpy (copy
+ 1, (*it
)->m_name
, len
);
2101 copy
[len
+ 1] = '\0';
2103 hsa_sanitize_name (copy
);
2105 tree var_name
= build_string (len
, copy
);
2106 TREE_TYPE (var_name
)
2107 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2110 vec
<constructor_elt
, va_gc
> *variable_info_vec
= NULL
;
2111 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2113 build_pointer_type (TREE_TYPE (var_name
)),
2115 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2116 build_fold_addr_expr ((*it
)->m_decl
));
2118 tree variable_info_ctor
= build_constructor (variable_info_type
,
2121 CONSTRUCTOR_APPEND_ELT (global_vars_vec
, NULL_TREE
,
2122 variable_info_ctor
);
2125 tree global_vars_ctor
= build_constructor (global_vars_array_type
,
2129 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_global_variables", 1);
2130 tree global_vars_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2131 get_identifier (tmp_name
),
2132 global_vars_array_type
);
2133 TREE_STATIC (global_vars_table
) = 1;
2134 TREE_READONLY (global_vars_table
) = 1;
2135 TREE_PUBLIC (global_vars_table
) = 0;
2136 DECL_ARTIFICIAL (global_vars_table
) = 1;
2137 DECL_IGNORED_P (global_vars_table
) = 1;
2138 DECL_EXTERNAL (global_vars_table
) = 0;
2139 TREE_CONSTANT (global_vars_table
) = 1;
2140 DECL_INITIAL (global_vars_table
) = global_vars_ctor
;
2141 varpool_node::finalize_decl (global_vars_table
);
2143 return global_vars_table
;
2146 /* Create __hsa_host_functions and __hsa_kernels that contain
2147 all informations consumed by libgomp to register all kernels
2148 in the BRIG binary. */
2151 hsa_output_kernels (tree
*host_func_table
, tree
*kernels
)
2153 unsigned map_count
= hsa_get_number_decl_kernel_mappings ();
2155 tree int_num_of_kernels
;
2156 int_num_of_kernels
= build_int_cst (uint32_type_node
, map_count
);
2157 tree kernel_num_index_type
= build_index_type (int_num_of_kernels
);
2158 tree host_functions_array_type
= build_array_type (ptr_type_node
,
2159 kernel_num_index_type
);
2160 TYPE_ARTIFICIAL (host_functions_array_type
) = 1;
2162 vec
<constructor_elt
, va_gc
> *host_functions_vec
= NULL
;
2163 for (unsigned i
= 0; i
< map_count
; ++i
)
2165 tree decl
= hsa_get_decl_kernel_mapping_decl (i
);
2166 tree host_fn
= build_fold_addr_expr (hsa_get_host_function (decl
));
2167 CONSTRUCTOR_APPEND_ELT (host_functions_vec
, NULL_TREE
, host_fn
);
2169 tree host_functions_ctor
= build_constructor (host_functions_array_type
,
2170 host_functions_vec
);
2172 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_host_functions", 1);
2173 tree hsa_host_func_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2174 get_identifier (tmp_name
),
2175 host_functions_array_type
);
2176 TREE_STATIC (hsa_host_func_table
) = 1;
2177 TREE_READONLY (hsa_host_func_table
) = 1;
2178 TREE_PUBLIC (hsa_host_func_table
) = 0;
2179 DECL_ARTIFICIAL (hsa_host_func_table
) = 1;
2180 DECL_IGNORED_P (hsa_host_func_table
) = 1;
2181 DECL_EXTERNAL (hsa_host_func_table
) = 0;
2182 TREE_CONSTANT (hsa_host_func_table
) = 1;
2183 DECL_INITIAL (hsa_host_func_table
) = host_functions_ctor
;
2184 varpool_node::finalize_decl (hsa_host_func_table
);
2185 *host_func_table
= hsa_host_func_table
;
2187 /* Following code emits list of kernel_info structures. */
2189 tree kernel_info_type
= make_node (RECORD_TYPE
);
2190 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2191 get_identifier ("name"), ptr_type_node
);
2192 DECL_CHAIN (id_f1
) = NULL_TREE
;
2193 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2194 get_identifier ("omp_data_size"),
2195 unsigned_type_node
);
2196 DECL_CHAIN (id_f2
) = id_f1
;
2197 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2198 get_identifier ("gridified_kernel_p"),
2200 DECL_CHAIN (id_f3
) = id_f2
;
2201 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2202 get_identifier ("kernel_dependencies_count"),
2203 unsigned_type_node
);
2204 DECL_CHAIN (id_f4
) = id_f3
;
2205 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2206 get_identifier ("kernel_dependencies"),
2207 build_pointer_type (build_pointer_type
2209 DECL_CHAIN (id_f5
) = id_f4
;
2210 finish_builtin_struct (kernel_info_type
, "__hsa_kernel_info", id_f5
,
2213 int_num_of_kernels
= build_int_cstu (uint32_type_node
, map_count
);
2214 tree kernel_info_vector_type
2215 = build_array_type (kernel_info_type
,
2216 build_index_type (int_num_of_kernels
));
2217 TYPE_ARTIFICIAL (kernel_info_vector_type
) = 1;
2219 vec
<constructor_elt
, va_gc
> *kernel_info_vector_vec
= NULL
;
2220 tree kernel_dependencies_vector_type
= NULL
;
2222 for (unsigned i
= 0; i
< map_count
; ++i
)
2224 tree kernel
= hsa_get_decl_kernel_mapping_decl (i
);
2225 char *name
= hsa_get_decl_kernel_mapping_name (i
);
2226 unsigned len
= strlen (name
);
2227 char *copy
= XNEWVEC (char, len
+ 2);
2229 memcpy (copy
+ 1, name
, len
);
2230 copy
[len
+ 1] = '\0';
2233 tree kern_name
= build_string (len
, copy
);
2234 TREE_TYPE (kern_name
)
2235 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2238 unsigned omp_size
= hsa_get_decl_kernel_mapping_omp_size (i
);
2239 tree omp_data_size
= build_int_cstu (unsigned_type_node
, omp_size
);
2240 bool gridified_kernel_p
= hsa_get_decl_kernel_mapping_gridified (i
);
2241 tree gridified_kernel_p_tree
= build_int_cstu (boolean_type_node
,
2242 gridified_kernel_p
);
2244 vec
<constructor_elt
, va_gc
> *kernel_dependencies_vec
= NULL
;
2245 if (hsa_decl_kernel_dependencies
)
2247 vec
<const char *> **slot
;
2248 slot
= hsa_decl_kernel_dependencies
->get (kernel
);
2251 vec
<const char *> *dependencies
= *slot
;
2252 count
= dependencies
->length ();
2254 kernel_dependencies_vector_type
2255 = build_array_type (build_pointer_type (char_type_node
),
2256 build_index_type (size_int (count
)));
2257 TYPE_ARTIFICIAL (kernel_dependencies_vector_type
) = 1;
2259 for (unsigned j
= 0; j
< count
; j
++)
2261 const char *d
= (*dependencies
)[j
];
2263 tree dependency_name
= build_string (len
, d
);
2264 TREE_TYPE (dependency_name
)
2265 = build_array_type (char_type_node
,
2266 build_index_type (size_int (len
)));
2268 CONSTRUCTOR_APPEND_ELT
2269 (kernel_dependencies_vec
, NULL_TREE
,
2271 build_pointer_type (TREE_TYPE (dependency_name
)),
2277 tree dependencies_count
= build_int_cstu (unsigned_type_node
, count
);
2279 vec
<constructor_elt
, va_gc
> *kernel_info_vec
= NULL
;
2280 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2282 build_pointer_type (TREE_TYPE
2285 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, omp_data_size
);
2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2287 gridified_kernel_p_tree
);
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, dependencies_count
);
2292 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_dependencies_list", i
);
2293 gcc_checking_assert (kernel_dependencies_vector_type
);
2294 tree dependencies_list
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2295 get_identifier (tmp_name
),
2296 kernel_dependencies_vector_type
);
2298 TREE_STATIC (dependencies_list
) = 1;
2299 TREE_READONLY (dependencies_list
) = 1;
2300 TREE_PUBLIC (dependencies_list
) = 0;
2301 DECL_ARTIFICIAL (dependencies_list
) = 1;
2302 DECL_IGNORED_P (dependencies_list
) = 1;
2303 DECL_EXTERNAL (dependencies_list
) = 0;
2304 TREE_CONSTANT (dependencies_list
) = 1;
2305 DECL_INITIAL (dependencies_list
)
2306 = build_constructor (kernel_dependencies_vector_type
,
2307 kernel_dependencies_vec
);
2308 varpool_node::finalize_decl (dependencies_list
);
2310 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2313 (TREE_TYPE (dependencies_list
)),
2314 dependencies_list
));
2317 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, null_pointer_node
);
2319 tree kernel_info_ctor
= build_constructor (kernel_info_type
,
2322 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec
, NULL_TREE
,
2326 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_kernels", 1);
2327 tree hsa_kernels
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2328 get_identifier (tmp_name
),
2329 kernel_info_vector_type
);
2331 TREE_STATIC (hsa_kernels
) = 1;
2332 TREE_READONLY (hsa_kernels
) = 1;
2333 TREE_PUBLIC (hsa_kernels
) = 0;
2334 DECL_ARTIFICIAL (hsa_kernels
) = 1;
2335 DECL_IGNORED_P (hsa_kernels
) = 1;
2336 DECL_EXTERNAL (hsa_kernels
) = 0;
2337 TREE_CONSTANT (hsa_kernels
) = 1;
2338 DECL_INITIAL (hsa_kernels
) = build_constructor (kernel_info_vector_type
,
2339 kernel_info_vector_vec
);
2340 varpool_node::finalize_decl (hsa_kernels
);
2341 *kernels
= hsa_kernels
;
2344 /* Create a static constructor that will register out brig stuff with
2348 hsa_output_libgomp_mapping (tree brig_decl
)
2350 unsigned kernel_count
= hsa_get_number_decl_kernel_mappings ();
2351 unsigned global_variable_count
= hsa_global_variable_symbols
->elements ();
2354 tree host_func_table
;
2356 hsa_output_kernels (&host_func_table
, &kernels
);
2357 tree global_vars
= hsa_output_global_variables ();
2359 tree hsa_image_desc_type
= make_node (RECORD_TYPE
);
2360 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2361 get_identifier ("brig_module"), ptr_type_node
);
2362 DECL_CHAIN (id_f1
) = NULL_TREE
;
2363 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2364 get_identifier ("kernel_count"),
2365 unsigned_type_node
);
2367 DECL_CHAIN (id_f2
) = id_f1
;
2368 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2369 get_identifier ("hsa_kernel_infos"),
2371 DECL_CHAIN (id_f3
) = id_f2
;
2372 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2373 get_identifier ("global_variable_count"),
2374 unsigned_type_node
);
2375 DECL_CHAIN (id_f4
) = id_f3
;
2376 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2377 get_identifier ("hsa_global_variable_infos"),
2379 DECL_CHAIN (id_f5
) = id_f4
;
2380 finish_builtin_struct (hsa_image_desc_type
, "__hsa_image_desc", id_f5
,
2382 TYPE_ARTIFICIAL (hsa_image_desc_type
) = 1;
2384 vec
<constructor_elt
, va_gc
> *img_desc_vec
= NULL
;
2385 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2386 build_fold_addr_expr (brig_decl
));
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2388 build_int_cstu (unsigned_type_node
, kernel_count
));
2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2391 build_pointer_type (TREE_TYPE (kernels
)),
2393 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2394 build_int_cstu (unsigned_type_node
,
2395 global_variable_count
));
2396 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2398 build_pointer_type (TREE_TYPE (global_vars
)),
2401 tree img_desc_ctor
= build_constructor (hsa_image_desc_type
, img_desc_vec
);
2404 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_img_descriptor", 1);
2405 tree hsa_img_descriptor
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2406 get_identifier (tmp_name
),
2407 hsa_image_desc_type
);
2408 TREE_STATIC (hsa_img_descriptor
) = 1;
2409 TREE_READONLY (hsa_img_descriptor
) = 1;
2410 TREE_PUBLIC (hsa_img_descriptor
) = 0;
2411 DECL_ARTIFICIAL (hsa_img_descriptor
) = 1;
2412 DECL_IGNORED_P (hsa_img_descriptor
) = 1;
2413 DECL_EXTERNAL (hsa_img_descriptor
) = 0;
2414 TREE_CONSTANT (hsa_img_descriptor
) = 1;
2415 DECL_INITIAL (hsa_img_descriptor
) = img_desc_ctor
;
2416 varpool_node::finalize_decl (hsa_img_descriptor
);
2418 /* Construct the "host_table" libgomp expects. */
2419 tree index_type
= build_index_type (build_int_cst (integer_type_node
, 4));
2420 tree libgomp_host_table_type
= build_array_type (ptr_type_node
, index_type
);
2421 TYPE_ARTIFICIAL (libgomp_host_table_type
) = 1;
2422 vec
<constructor_elt
, va_gc
> *libgomp_host_table_vec
= NULL
;
2423 tree host_func_table_addr
= build_fold_addr_expr (host_func_table
);
2424 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2425 host_func_table_addr
);
2426 offset_int func_table_size
2427 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node
)) * kernel_count
;
2428 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2429 fold_build2 (POINTER_PLUS_EXPR
,
2430 TREE_TYPE (host_func_table_addr
),
2431 host_func_table_addr
,
2432 build_int_cst (size_type_node
,
2433 func_table_size
.to_uhwi
2435 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2437 tree libgomp_host_table_ctor
= build_constructor (libgomp_host_table_type
,
2438 libgomp_host_table_vec
);
2439 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_libgomp_host_table", 1);
2440 tree hsa_libgomp_host_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2441 get_identifier (tmp_name
),
2442 libgomp_host_table_type
);
2444 TREE_STATIC (hsa_libgomp_host_table
) = 1;
2445 TREE_READONLY (hsa_libgomp_host_table
) = 1;
2446 TREE_PUBLIC (hsa_libgomp_host_table
) = 0;
2447 DECL_ARTIFICIAL (hsa_libgomp_host_table
) = 1;
2448 DECL_IGNORED_P (hsa_libgomp_host_table
) = 1;
2449 DECL_EXTERNAL (hsa_libgomp_host_table
) = 0;
2450 TREE_CONSTANT (hsa_libgomp_host_table
) = 1;
2451 DECL_INITIAL (hsa_libgomp_host_table
) = libgomp_host_table_ctor
;
2452 varpool_node::finalize_decl (hsa_libgomp_host_table
);
2454 /* Generate an initializer with a call to the registration routine. */
2456 tree offload_register
2457 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER
);
2458 gcc_checking_assert (offload_register
);
2460 tree
*hsa_ctor_stmts
= hsa_get_ctor_statements ();
2461 append_to_statement_list
2462 (build_call_expr (offload_register
, 4,
2463 build_int_cstu (unsigned_type_node
,
2464 GOMP_VERSION_PACK (GOMP_VERSION
,
2466 build_fold_addr_expr (hsa_libgomp_host_table
),
2467 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2468 build_fold_addr_expr (hsa_img_descriptor
)),
2471 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts
, DEFAULT_INIT_PRIORITY
);
2473 tree offload_unregister
2474 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER
);
2475 gcc_checking_assert (offload_unregister
);
2477 tree
*hsa_dtor_stmts
= hsa_get_dtor_statements ();
2478 append_to_statement_list
2479 (build_call_expr (offload_unregister
, 4,
2480 build_int_cstu (unsigned_type_node
,
2481 GOMP_VERSION_PACK (GOMP_VERSION
,
2483 build_fold_addr_expr (hsa_libgomp_host_table
),
2484 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2485 build_fold_addr_expr (hsa_img_descriptor
)),
2487 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts
, DEFAULT_INIT_PRIORITY
);
2490 /* Emit the brig module we have compiled to a section in the final assembly and
2491 also create a compile unit static constructor that will register the brig
2492 module with libgomp. */
2495 hsa_output_brig (void)
2497 section
*saved_section
;
2499 if (!brig_initialized
)
2502 for (unsigned i
= 0; i
< function_call_linkage
.length (); i
++)
2504 function_linkage_pair p
= function_call_linkage
[i
];
2506 BrigCodeOffset32_t
*func_offset
= function_offsets
->get (p
.function_decl
);
2507 gcc_assert (*func_offset
);
2508 BrigOperandCodeRef
*code_ref
2509 = (BrigOperandCodeRef
*) (brig_operand
.get_ptr_by_offset (p
.offset
));
2510 gcc_assert (code_ref
->base
.kind
== BRIG_KIND_OPERAND_CODE_REF
);
2511 code_ref
->ref
= lendian32 (*func_offset
);
2514 /* Iterate all function declarations and if we meet a function that should
2515 have module linkage and we are unable to emit HSAIL for the function,
2516 then change the linkage to program linkage. Doing so, we will emit
2517 a valid BRIG image. */
2518 if (hsa_failed_functions
!= NULL
&& emitted_declarations
!= NULL
)
2519 for (hash_map
<tree
, BrigDirectiveExecutable
*>::iterator it
2520 = emitted_declarations
->begin ();
2521 it
!= emitted_declarations
->end ();
2524 if (hsa_failed_functions
->contains ((*it
).first
))
2525 (*it
).second
->linkage
= BRIG_LINKAGE_PROGRAM
;
2528 saved_section
= in_section
;
2530 switch_to_section (get_section (BRIG_ELF_SECTION_NAME
, SECTION_NOTYPE
, NULL
));
2532 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, BRIG_LABEL_STRING
, 1);
2533 ASM_OUTPUT_LABEL (asm_out_file
, tmp_name
);
2534 tree brig_id
= get_identifier (tmp_name
);
2535 tree brig_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, brig_id
,
2537 SET_DECL_ASSEMBLER_NAME (brig_decl
, brig_id
);
2538 TREE_ADDRESSABLE (brig_decl
) = 1;
2539 TREE_READONLY (brig_decl
) = 1;
2540 DECL_ARTIFICIAL (brig_decl
) = 1;
2541 DECL_IGNORED_P (brig_decl
) = 1;
2542 TREE_STATIC (brig_decl
) = 1;
2543 TREE_PUBLIC (brig_decl
) = 0;
2544 TREE_USED (brig_decl
) = 1;
2545 DECL_INITIAL (brig_decl
) = brig_decl
;
2546 TREE_ASM_WRITTEN (brig_decl
) = 1;
2548 BrigModuleHeader module_header
;
2549 memcpy (&module_header
.identification
, "HSA BRIG",
2550 sizeof (module_header
.identification
));
2551 module_header
.brigMajor
= lendian32 (BRIG_VERSION_BRIG_MAJOR
);
2552 module_header
.brigMinor
= lendian32 (BRIG_VERSION_BRIG_MINOR
);
2553 uint64_t section_index
[3];
2555 int data_padding
, code_padding
, operand_padding
;
2556 data_padding
= HSA_SECTION_ALIGNMENT
2557 - brig_data
.total_size
% HSA_SECTION_ALIGNMENT
;
2558 code_padding
= HSA_SECTION_ALIGNMENT
2559 - brig_code
.total_size
% HSA_SECTION_ALIGNMENT
;
2560 operand_padding
= HSA_SECTION_ALIGNMENT
2561 - brig_operand
.total_size
% HSA_SECTION_ALIGNMENT
;
2563 uint64_t module_size
= sizeof (module_header
)
2564 + sizeof (section_index
)
2565 + brig_data
.total_size
2567 + brig_code
.total_size
2569 + brig_operand
.total_size
2571 gcc_assert ((module_size
% 16) == 0);
2572 module_header
.byteCount
= lendian64 (module_size
);
2573 memset (&module_header
.hash
, 0, sizeof (module_header
.hash
));
2574 module_header
.reserved
= 0;
2575 module_header
.sectionCount
= lendian32 (3);
2576 module_header
.sectionIndex
= lendian64 (sizeof (module_header
));
2577 assemble_string ((const char *) &module_header
, sizeof (module_header
));
2578 uint64_t off
= sizeof (module_header
) + sizeof (section_index
);
2579 section_index
[0] = lendian64 (off
);
2580 off
+= brig_data
.total_size
+ data_padding
;
2581 section_index
[1] = lendian64 (off
);
2582 off
+= brig_code
.total_size
+ code_padding
;
2583 section_index
[2] = lendian64 (off
);
2584 assemble_string ((const char *) §ion_index
, sizeof (section_index
));
2586 char padding
[HSA_SECTION_ALIGNMENT
];
2587 memset (padding
, 0, sizeof (padding
));
2589 brig_data
.output ();
2590 assemble_string (padding
, data_padding
);
2591 brig_code
.output ();
2592 assemble_string (padding
, code_padding
);
2593 brig_operand
.output ();
2594 assemble_string (padding
, operand_padding
);
2597 switch_to_section (saved_section
);
2599 hsa_output_libgomp_mapping (brig_decl
);
2601 hsa_free_decl_kernel_mapping ();
2602 brig_release_data ();
2603 hsa_deinit_compilation_unit_data ();
2605 delete emitted_declarations
;
2606 emitted_declarations
= NULL
;
2607 delete function_offsets
;
2608 function_offsets
= NULL
;