1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hash-table.h"
33 #include "tree-iterator.h"
34 #include "stor-layout.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "gimple-pretty-print.h"
41 #include "diagnostic-core.h"
44 #include "print-tree.h"
45 #include "symbol-summary.h"
47 #include "gomp-constants.h"
49 /* Convert VAL to little endian form, if necessary. */
52 lendian16 (uint16_t val
)
54 #if GCC_VERSION >= 4008
55 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
57 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
58 return __builtin_bswap16 (val
);
59 #else /* __ORDER_PDP_ENDIAN__ */
63 // provide a safe slower default, with shifts and masking
64 #ifndef WORDS_BIGENDIAN
67 return (val
>> 8) | (val
<< 8);
72 /* Convert VAL to little endian form, if necessary. */
75 lendian32 (uint32_t val
)
77 #if GCC_VERSION >= 4006
78 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
80 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
81 return __builtin_bswap32 (val
);
82 #else /* __ORDER_PDP_ENDIAN__ */
83 return (val
>> 16) | (val
<< 16);
86 // provide a safe slower default, with shifts and masking
87 #ifndef WORDS_BIGENDIAN
90 val
= ((val
& 0xff00ff00) >> 8) | ((val
& 0xff00ff) << 8);
91 return (val
>> 16) | (val
<< 16);
96 /* Convert VAL to little endian form, if necessary. */
99 lendian64 (uint64_t val
)
101 #if GCC_VERSION >= 4006
102 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
104 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
105 return __builtin_bswap64 (val
);
106 #else /* __ORDER_PDP_ENDIAN__ */
107 return (((val
& 0xffffll
) << 48)
108 | ((val
& 0xffff0000ll
) << 16)
109 | ((val
& 0xffff00000000ll
) >> 16)
110 | ((val
& 0xffff000000000000ll
) >> 48));
113 // provide a safe slower default, with shifts and masking
114 #ifndef WORDS_BIGENDIAN
117 val
= (((val
& 0xff00ff00ff00ff00ll
) >> 8)
118 | ((val
& 0x00ff00ff00ff00ffll
) << 8));
119 val
= ((( val
& 0xffff0000ffff0000ll
) >> 16)
120 | (( val
& 0x0000ffff0000ffffll
) << 16));
121 return (val
>> 32) | (val
<< 32);
126 #define BRIG_ELF_SECTION_NAME ".brig"
127 #define BRIG_LABEL_STRING "hsa_brig"
128 #define BRIG_SECTION_DATA_NAME "hsa_data"
129 #define BRIG_SECTION_CODE_NAME "hsa_code"
130 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
132 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
134 /* Required HSA section alignment. */
136 #define HSA_SECTION_ALIGNMENT 16
138 /* Chunks of BRIG binary data. */
140 struct hsa_brig_data_chunk
142 /* Size of the data already stored into a chunk. */
145 /* Pointer to the data. */
149 /* Structure representing a BRIG section, holding and writing its data. */
151 class hsa_brig_section
154 /* Section name that will be output to the BRIG. */
155 const char *section_name
;
156 /* Size in bytes of all data stored in the section. */
158 /* The size of the header of the section including padding. */
159 unsigned header_byte_count
;
160 /* The size of the header of the section without any padding. */
161 unsigned header_byte_delta
;
163 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
164 vec
<struct hsa_brig_data_chunk
> chunks
;
166 /* More convenient access to the last chunk from the vector above. */
167 struct hsa_brig_data_chunk
*cur_chunk
;
169 void allocate_new_chunk ();
170 void init (const char *name
);
173 unsigned add (const void *data
, unsigned len
);
174 void round_size_up (int factor
);
175 void *get_ptr_by_offset (unsigned int offset
);
178 static struct hsa_brig_section brig_data
, brig_code
, brig_operand
;
179 static uint32_t brig_insn_count
;
180 static bool brig_initialized
= false;
182 /* Mapping between emitted HSA functions and their offset in code segment. */
183 static hash_map
<tree
, BrigCodeOffset32_t
> *function_offsets
;
185 /* Hash map of emitted function declarations. */
186 static hash_map
<tree
, BrigDirectiveExecutable
*> *emitted_declarations
;
188 /* Hash table of emitted internal function declaration offsets. */
189 hash_table
<hsa_internal_fn_hasher
> *hsa_emitted_internal_decls
;
191 /* List of sbr instructions. */
192 static vec
<hsa_insn_sbr
*> *switch_instructions
;
194 struct function_linkage_pair
196 function_linkage_pair (tree decl
, unsigned int off
)
197 : function_decl (decl
), offset (off
) {}
199 /* Declaration of called function. */
202 /* Offset in operand section. */
206 /* Vector of function calls where we need to resolve function offsets. */
207 static auto_vec
<function_linkage_pair
> function_call_linkage
;
209 /* Add a new chunk, allocate data for it and initialize it. */
212 hsa_brig_section::allocate_new_chunk ()
214 struct hsa_brig_data_chunk new_chunk
;
216 new_chunk
.data
= XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE
);
218 cur_chunk
= chunks
.safe_push (new_chunk
);
221 /* Initialize the brig section. */
224 hsa_brig_section::init (const char *name
)
227 /* While the following computation is basically wrong, because the intent
228 certainly wasn't to have the first character of name and padding, which
229 are a part of sizeof (BrigSectionHeader), included in the first addend,
230 this is what the disassembler expects. */
231 total_size
= sizeof (BrigSectionHeader
) + strlen (section_name
);
233 allocate_new_chunk ();
234 header_byte_delta
= total_size
;
236 header_byte_count
= total_size
;
239 /* Free all data in the section. */
242 hsa_brig_section::release ()
244 for (unsigned i
= 0; i
< chunks
.length (); i
++)
245 free (chunks
[i
].data
);
250 /* Write the section to the output file to a section with the name given at
251 initialization. Switches the output section and does not restore it. */
254 hsa_brig_section::output ()
256 struct BrigSectionHeader section_header
;
259 section_header
.byteCount
= lendian64 (total_size
);
260 section_header
.headerByteCount
= lendian32 (header_byte_count
);
261 section_header
.nameLength
= lendian32 (strlen (section_name
));
262 assemble_string ((const char *) §ion_header
, 16);
263 assemble_string (section_name
, (section_header
.nameLength
));
264 memset (&padding
, 0, sizeof (padding
));
265 /* This is also a consequence of the wrong header size computation described
266 in a comment in hsa_brig_section::init. */
267 assemble_string (padding
, 8);
268 for (unsigned i
= 0; i
< chunks
.length (); i
++)
269 assemble_string (chunks
[i
].data
, chunks
[i
].size
);
272 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
273 which it was stored. */
276 hsa_brig_section::add (const void *data
, unsigned len
)
278 unsigned offset
= total_size
;
280 gcc_assert (len
<= BRIG_CHUNK_MAX_SIZE
);
281 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- len
))
282 allocate_new_chunk ();
284 memcpy (cur_chunk
->data
+ cur_chunk
->size
, data
, len
);
285 cur_chunk
->size
+= len
;
291 /* Add padding to section so that its size is divisible by FACTOR. */
294 hsa_brig_section::round_size_up (int factor
)
296 unsigned padding
, res
= total_size
% factor
;
301 padding
= factor
- res
;
302 total_size
+= padding
;
303 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- padding
))
305 padding
-= BRIG_CHUNK_MAX_SIZE
- cur_chunk
->size
;
306 cur_chunk
->size
= BRIG_CHUNK_MAX_SIZE
;
307 allocate_new_chunk ();
310 cur_chunk
->size
+= padding
;
313 /* Return pointer to data by global OFFSET in the section. */
316 hsa_brig_section::get_ptr_by_offset (unsigned int offset
)
318 gcc_assert (offset
< total_size
);
319 offset
-= header_byte_delta
;
322 for (i
= 0; offset
>= chunks
[i
].size
; i
++)
323 offset
-= chunks
[i
].size
;
325 return chunks
[i
].data
+ offset
;
328 /* BRIG string data hashing. */
330 struct brig_string_slot
338 /* Hash table helpers. */
340 struct brig_string_slot_hasher
: pointer_hash
<brig_string_slot
>
342 static inline hashval_t
hash (const value_type
);
343 static inline bool equal (const value_type
, const compare_type
);
344 static inline void remove (value_type
);
347 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
348 to support strings that may not end in '\0'. */
351 brig_string_slot_hasher::hash (const value_type ds
)
353 hashval_t r
= ds
->len
;
356 for (i
= 0; i
< ds
->len
; i
++)
357 r
= r
* 67 + (unsigned) ds
->s
[i
] - 113;
358 r
= r
* 67 + (unsigned) ds
->prefix
- 113;
362 /* Returns nonzero if DS1 and DS2 are equal. */
365 brig_string_slot_hasher::equal (const value_type ds1
, const compare_type ds2
)
367 if (ds1
->len
== ds2
->len
)
368 return ds1
->prefix
== ds2
->prefix
369 && memcmp (ds1
->s
, ds2
->s
, ds1
->len
) == 0;
374 /* Deallocate memory for DS upon its removal. */
377 brig_string_slot_hasher::remove (value_type ds
)
379 free (const_cast<char *> (ds
->s
));
383 /* Hash for strings we output in order not to duplicate them needlessly. */
385 static hash_table
<brig_string_slot_hasher
> *brig_string_htab
;
387 /* Emit a null terminated string STR to the data section and return its
388 offset in it. If PREFIX is non-zero, output it just before STR too.
389 Sanitize the string if SANITIZE option is set to true. */
392 brig_emit_string (const char *str
, char prefix
= 0, bool sanitize
= true)
394 unsigned slen
= strlen (str
);
395 unsigned offset
, len
= slen
+ (prefix
? 1 : 0);
396 uint32_t hdr_len
= lendian32 (len
);
397 brig_string_slot s_slot
;
398 brig_string_slot
**slot
;
401 str2
= xstrdup (str
);
404 hsa_sanitize_name (str2
);
407 s_slot
.prefix
= prefix
;
410 slot
= brig_string_htab
->find_slot (&s_slot
, INSERT
);
413 brig_string_slot
*new_slot
= XCNEW (brig_string_slot
);
415 /* In theory we should fill in BrigData but that would mean copying
416 the string to a buffer for no reason, so we just emulate it. */
417 offset
= brig_data
.add (&hdr_len
, sizeof (hdr_len
));
419 brig_data
.add (&prefix
, 1);
421 brig_data
.add (str2
, slen
);
422 brig_data
.round_size_up (4);
424 /* TODO: could use the string we just copied into
425 brig_string->cur_chunk */
427 new_slot
->len
= slen
;
428 new_slot
->prefix
= prefix
;
429 new_slot
->offset
= offset
;
434 offset
= (*slot
)->offset
;
441 /* Linked list of queued operands. */
443 static struct operand_queue
445 /* First from the chain of queued operands. */
446 hsa_op_base
*first_op
, *last_op
;
448 /* The offset at which the next operand will be enqueued. */
449 unsigned projected_size
;
453 /* Unless already initialized, initialize infrastructure to produce BRIG. */
460 if (brig_initialized
)
463 brig_string_htab
= new hash_table
<brig_string_slot_hasher
> (37);
464 brig_data
.init (BRIG_SECTION_DATA_NAME
);
465 brig_code
.init (BRIG_SECTION_CODE_NAME
);
466 brig_operand
.init (BRIG_SECTION_OPERAND_NAME
);
467 brig_initialized
= true;
469 struct BrigDirectiveModule moddir
;
470 memset (&moddir
, 0, sizeof (moddir
));
471 moddir
.base
.byteCount
= lendian16 (sizeof (moddir
));
474 if (main_input_filename
&& *main_input_filename
!= '\0')
476 const char *part
= strrchr (main_input_filename
, '/');
478 part
= main_input_filename
;
481 modname
= concat ("&__hsa_module_", part
, NULL
);
482 char *extension
= strchr (modname
, '.');
486 /* As in LTO mode, we have to emit a different module names. */
489 part
= strrchr (asm_file_name
, '/');
491 part
= asm_file_name
;
495 asprintf (&modname2
, "%s_%s", modname
, part
);
500 hsa_sanitize_name (modname
);
501 moddir
.name
= brig_emit_string (modname
);
505 moddir
.name
= brig_emit_string ("__hsa_module_unnamed", '&');
506 moddir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_MODULE
);
507 moddir
.hsailMajor
= lendian32 (BRIG_VERSION_HSAIL_MAJOR
);
508 moddir
.hsailMinor
= lendian32 (BRIG_VERSION_HSAIL_MINOR
);
509 moddir
.profile
= hsa_full_profile_p () ? BRIG_PROFILE_FULL
: BRIG_PROFILE_BASE
;
510 if (hsa_machine_large_p ())
511 moddir
.machineModel
= BRIG_MACHINE_LARGE
;
513 moddir
.machineModel
= BRIG_MACHINE_SMALL
;
514 moddir
.defaultFloatRound
= BRIG_ROUND_FLOAT_DEFAULT
;
515 brig_code
.add (&moddir
, sizeof (moddir
));
518 /* Free all BRIG data. */
521 brig_release_data (void)
523 delete brig_string_htab
;
524 brig_data
.release ();
525 brig_code
.release ();
526 brig_operand
.release ();
528 brig_initialized
= 0;
531 /* Enqueue operation OP. Return the offset at which it will be stored. */
534 enqueue_op (hsa_op_base
*op
)
538 if (op
->m_brig_op_offset
)
539 return op
->m_brig_op_offset
;
541 ret
= op_queue
.projected_size
;
542 op
->m_brig_op_offset
= op_queue
.projected_size
;
544 if (!op_queue
.first_op
)
545 op_queue
.first_op
= op
;
547 op_queue
.last_op
->m_next
= op
;
548 op_queue
.last_op
= op
;
550 if (is_a
<hsa_op_immed
*> (op
))
551 op_queue
.projected_size
+= sizeof (struct BrigOperandConstantBytes
);
552 else if (is_a
<hsa_op_reg
*> (op
))
553 op_queue
.projected_size
+= sizeof (struct BrigOperandRegister
);
554 else if (is_a
<hsa_op_address
*> (op
))
555 op_queue
.projected_size
+= sizeof (struct BrigOperandAddress
);
556 else if (is_a
<hsa_op_code_ref
*> (op
))
557 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeRef
);
558 else if (is_a
<hsa_op_code_list
*> (op
))
559 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeList
);
560 else if (is_a
<hsa_op_operand_list
*> (op
))
561 op_queue
.projected_size
+= sizeof (struct BrigOperandOperandList
);
568 /* Emit directive describing a symbol if it has not been emitted already.
569 Return the offset of the directive. */
572 emit_directive_variable (struct hsa_symbol
*symbol
)
574 struct BrigDirectiveVariable dirvar
;
575 unsigned name_offset
;
576 static unsigned res_name_offset
;
578 if (symbol
->m_directive_offset
)
579 return symbol
->m_directive_offset
;
581 memset (&dirvar
, 0, sizeof (dirvar
));
582 dirvar
.base
.byteCount
= lendian16 (sizeof (dirvar
));
583 dirvar
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE
);
584 dirvar
.allocation
= symbol
->m_allocation
;
586 char prefix
= symbol
->m_global_scope_p
? '&' : '%';
588 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == RESULT_DECL
)
590 if (res_name_offset
== 0)
591 res_name_offset
= brig_emit_string (symbol
->m_name
, '%');
592 name_offset
= res_name_offset
;
594 else if (symbol
->m_name
)
595 name_offset
= brig_emit_string (symbol
->m_name
, prefix
);
599 snprintf (buf
, 64, "__%s_%i", hsa_seg_name (symbol
->m_segment
),
600 symbol
->m_name_number
);
601 name_offset
= brig_emit_string (buf
, prefix
);
604 dirvar
.name
= lendian32 (name_offset
);
606 dirvar
.type
= lendian16 (symbol
->m_type
);
607 dirvar
.segment
= symbol
->m_segment
;
608 dirvar
.align
= symbol
->m_align
;
609 dirvar
.linkage
= symbol
->m_linkage
;
610 dirvar
.dim
.lo
= symbol
->m_dim
;
611 dirvar
.dim
.hi
= symbol
->m_dim
>> 32;
613 /* Global variables are just declared and linked via HSA runtime. */
614 if (symbol
->m_linkage
!= BRIG_ALLOCATION_PROGRAM
)
615 dirvar
.modifier
|= BRIG_VARIABLE_DEFINITION
;
618 if (symbol
->m_cst_value
)
620 dirvar
.modifier
|= BRIG_VARIABLE_CONST
;
621 dirvar
.init
= lendian32 (enqueue_op (symbol
->m_cst_value
));
624 symbol
->m_directive_offset
= brig_code
.add (&dirvar
, sizeof (dirvar
));
625 return symbol
->m_directive_offset
;
628 /* Emit directives describing either a function declaration or
631 static BrigDirectiveExecutable
*
632 emit_function_directives (hsa_function_representation
*f
, bool is_declaration
)
634 struct BrigDirectiveExecutable fndir
;
635 unsigned name_offset
, inarg_off
, scoped_off
, next_toplev_off
;
637 BrigDirectiveExecutable
*ptr_to_fndir
;
640 if (!f
->m_declaration_p
)
641 for (int i
= 0; f
->m_global_symbols
.iterate (i
, &sym
); i
++)
643 gcc_assert (!sym
->m_emitted_to_brig
);
644 sym
->m_emitted_to_brig
= true;
645 emit_directive_variable (sym
);
649 name_offset
= brig_emit_string (f
->m_name
, '&');
650 inarg_off
= brig_code
.total_size
+ sizeof (fndir
)
651 + (f
->m_output_arg
? sizeof (struct BrigDirectiveVariable
) : 0);
652 scoped_off
= inarg_off
653 + f
->m_input_args
.length () * sizeof (struct BrigDirectiveVariable
);
655 if (!f
->m_declaration_p
)
657 count
+= f
->m_spill_symbols
.length ();
658 count
+= f
->m_private_variables
.length ();
661 next_toplev_off
= scoped_off
+ count
* sizeof (struct BrigDirectiveVariable
);
663 memset (&fndir
, 0, sizeof (fndir
));
664 fndir
.base
.byteCount
= lendian16 (sizeof (fndir
));
665 fndir
.base
.kind
= lendian16 (f
->m_kern_p
? BRIG_KIND_DIRECTIVE_KERNEL
666 : BRIG_KIND_DIRECTIVE_FUNCTION
);
667 fndir
.name
= lendian32 (name_offset
);
668 fndir
.inArgCount
= lendian16 (f
->m_input_args
.length ());
669 fndir
.outArgCount
= lendian16 (f
->m_output_arg
? 1 : 0);
670 fndir
.firstInArg
= lendian32 (inarg_off
);
671 fndir
.firstCodeBlockEntry
= lendian32 (scoped_off
);
672 fndir
.nextModuleEntry
= lendian32 (next_toplev_off
);
673 fndir
.linkage
= f
->get_linkage ();
674 if (!f
->m_declaration_p
)
675 fndir
.modifier
|= BRIG_EXECUTABLE_DEFINITION
;
676 memset (&fndir
.reserved
, 0, sizeof (fndir
.reserved
));
678 /* Once we put a definition of function_offsets, we should not overwrite
679 it with a declaration of the function. */
680 if (f
->m_internal_fn
== NULL
)
682 if (!function_offsets
->get (f
->m_decl
) || !is_declaration
)
683 function_offsets
->put (f
->m_decl
, brig_code
.total_size
);
687 /* Internal function. */
688 hsa_internal_fn
**slot
689 = hsa_emitted_internal_decls
->find_slot (f
->m_internal_fn
, INSERT
);
690 hsa_internal_fn
*int_fn
= new hsa_internal_fn (f
->m_internal_fn
);
691 int_fn
->m_offset
= brig_code
.total_size
;
695 brig_code
.add (&fndir
, sizeof (fndir
));
696 /* terrible hack: we need to set instCount after we emit all
697 insns, but we need to emit directive in order, and we emit directives
698 during insn emitting. So we need to emit the FUNCTION directive
699 early, then the insns, and then we need to set instCount, so remember
700 a pointer to it, in some horrible way. cur_chunk.data+size points
701 directly to after fndir here. */
703 = (BrigDirectiveExecutable
*)(brig_code
.cur_chunk
->data
704 + brig_code
.cur_chunk
->size
708 emit_directive_variable (f
->m_output_arg
);
709 for (unsigned i
= 0; i
< f
->m_input_args
.length (); i
++)
710 emit_directive_variable (f
->m_input_args
[i
]);
712 if (!f
->m_declaration_p
)
714 for (int i
= 0; f
->m_spill_symbols
.iterate (i
, &sym
); i
++)
716 emit_directive_variable (sym
);
719 for (unsigned i
= 0; i
< f
->m_private_variables
.length (); i
++)
721 emit_directive_variable (f
->m_private_variables
[i
]);
729 /* Emit a label directive for the given HBB. We assume it is about to start on
730 the current offset in the code section. */
733 emit_bb_label_directive (hsa_bb
*hbb
)
735 struct BrigDirectiveLabel lbldir
;
737 lbldir
.base
.byteCount
= lendian16 (sizeof (lbldir
));
738 lbldir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_LABEL
);
740 snprintf (buf
, 32, "BB_%u_%i", DECL_UID (current_function_decl
),
742 lbldir
.name
= lendian32 (brig_emit_string (buf
, '@'));
744 hbb
->m_label_ref
.m_directive_offset
= brig_code
.add (&lbldir
,
749 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
750 holding such, for constants and registers. */
753 regtype_for_type (BrigType16_t t
)
772 case BRIG_TYPE_U16X2
:
774 case BRIG_TYPE_S16X2
:
775 case BRIG_TYPE_F16X2
:
776 return BRIG_TYPE_B32
;
783 case BRIG_TYPE_U16X4
:
784 case BRIG_TYPE_U32X2
:
786 case BRIG_TYPE_S16X4
:
787 case BRIG_TYPE_S32X2
:
788 case BRIG_TYPE_F16X4
:
789 case BRIG_TYPE_F32X2
:
790 return BRIG_TYPE_B64
;
793 case BRIG_TYPE_U8X16
:
794 case BRIG_TYPE_U16X8
:
795 case BRIG_TYPE_U32X4
:
796 case BRIG_TYPE_U64X2
:
797 case BRIG_TYPE_S8X16
:
798 case BRIG_TYPE_S16X8
:
799 case BRIG_TYPE_S32X4
:
800 case BRIG_TYPE_S64X2
:
801 case BRIG_TYPE_F16X8
:
802 case BRIG_TYPE_F32X4
:
803 case BRIG_TYPE_F64X2
:
804 return BRIG_TYPE_B128
;
811 /* Return the length of the BRIG type TYPE that is going to be streamed out as
812 an immediate constant (so it must not be B1). */
815 hsa_get_imm_brig_type_len (BrigType16_t type
)
817 BrigType16_t base_type
= type
& BRIG_TYPE_BASE_MASK
;
818 BrigType16_t pack_type
= type
& BRIG_TYPE_PACK_MASK
;
822 case BRIG_TYPE_PACK_NONE
:
824 case BRIG_TYPE_PACK_32
:
826 case BRIG_TYPE_PACK_64
:
828 case BRIG_TYPE_PACK_128
:
862 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
863 If NEED_LEN is not equal to zero, shrink or extend the value
864 to NEED_LEN bytes. Return how many bytes were written. */
867 emit_immediate_scalar_to_buffer (tree value
, char *data
, unsigned need_len
)
869 union hsa_bytes bytes
;
871 memset (&bytes
, 0, sizeof (bytes
));
872 tree type
= TREE_TYPE (value
);
873 gcc_checking_assert (TREE_CODE (type
) != VECTOR_TYPE
);
875 unsigned data_len
= tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
876 if (INTEGRAL_TYPE_P (type
)
877 || (POINTER_TYPE_P (type
) && TREE_CODE (value
) == INTEGER_CST
))
881 bytes
.b8
= (uint8_t) TREE_INT_CST_LOW (value
);
884 bytes
.b16
= (uint16_t) TREE_INT_CST_LOW (value
);
887 bytes
.b32
= (uint32_t) TREE_INT_CST_LOW (value
);
890 bytes
.b64
= (uint64_t) TREE_INT_CST_LOW (value
);
895 else if (SCALAR_FLOAT_TYPE_P (type
))
899 sorry ("Support for HSA does not implement immediate 16 bit FPU "
903 unsigned int_len
= GET_MODE_SIZE (TYPE_MODE (type
));
904 /* There are always 32 bits in each long, no matter the size of
908 real_to_target (tmp
, TREE_REAL_CST_PTR (value
), TYPE_MODE (type
));
911 bytes
.b32
= (uint32_t) tmp
[0];
914 bytes
.b64
= (uint64_t)(uint32_t) tmp
[1];
916 bytes
.b64
|= (uint32_t) tmp
[0];
928 memcpy (data
, &bytes
, len
);
933 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size
)
936 *brig_repr_size
= hsa_get_imm_brig_type_len (m_type
);
938 if (m_tree_value
!= NULL_TREE
)
940 /* Update brig_repr_size for special tree values. */
941 if (TREE_CODE (m_tree_value
) == STRING_CST
)
942 *brig_repr_size
= TREE_STRING_LENGTH (m_tree_value
);
943 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
945 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value
)));
947 unsigned total_len
= *brig_repr_size
;
949 /* As we can have a constructor with fewer elements, fill the memory
951 brig_repr
= XCNEWVEC (char, total_len
);
954 if (TREE_CODE (m_tree_value
) == VECTOR_CST
)
956 int i
, num
= VECTOR_CST_NELTS (m_tree_value
);
957 for (i
= 0; i
< num
; i
++)
959 tree v
= VECTOR_CST_ELT (m_tree_value
, i
);
960 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
964 /* Vectors should have the exact size. */
965 gcc_assert (total_len
== 0);
967 else if (TREE_CODE (m_tree_value
) == STRING_CST
)
968 memcpy (brig_repr
, TREE_STRING_POINTER (m_tree_value
),
969 TREE_STRING_LENGTH (m_tree_value
));
970 else if (TREE_CODE (m_tree_value
) == COMPLEX_CST
)
972 gcc_assert (total_len
% 2 == 0);
975 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value
), p
,
978 gcc_assert (actual
== total_len
/ 2);
982 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value
), p
,
984 gcc_assert (actual
== total_len
/ 2);
986 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
988 unsigned len
= vec_safe_length (CONSTRUCTOR_ELTS (m_tree_value
));
989 for (unsigned i
= 0; i
< len
; i
++)
991 tree v
= CONSTRUCTOR_ELT (m_tree_value
, i
)->value
;
992 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
998 emit_immediate_scalar_to_buffer (m_tree_value
, p
, total_len
);
1004 switch (*brig_repr_size
)
1007 bytes
.b8
= (uint8_t) m_int_value
;
1010 bytes
.b16
= (uint16_t) m_int_value
;
1013 bytes
.b32
= (uint32_t) m_int_value
;
1016 bytes
.b64
= (uint64_t) m_int_value
;
1022 brig_repr
= XNEWVEC (char, *brig_repr_size
);
1023 memcpy (brig_repr
, &bytes
, *brig_repr_size
);
1029 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1030 have been massaged to comply with various HSA/BRIG type requirements, so the
1031 only important aspect of that is the length (because HSAIL might expect
1032 smaller constants or become bit-data). The data should be represented
1033 according to what is in the tree representation. */
1036 emit_immediate_operand (hsa_op_immed
*imm
)
1038 unsigned brig_repr_size
;
1039 char *brig_repr
= imm
->emit_to_buffer (&brig_repr_size
);
1040 struct BrigOperandConstantBytes out
;
1042 memset (&out
, 0, sizeof (out
));
1043 out
.base
.byteCount
= lendian16 (sizeof (out
));
1044 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES
);
1045 uint32_t byteCount
= lendian32 (brig_repr_size
);
1046 out
.type
= lendian16 (imm
->m_type
);
1047 out
.bytes
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1048 brig_operand
.add (&out
, sizeof (out
));
1049 brig_data
.add (brig_repr
, brig_repr_size
);
1050 brig_data
.round_size_up (4);
1055 /* Emit a register BRIG operand REG. */
1058 emit_register_operand (hsa_op_reg
*reg
)
1060 struct BrigOperandRegister out
;
1062 out
.base
.byteCount
= lendian16 (sizeof (out
));
1063 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_REGISTER
);
1064 out
.regNum
= lendian32 (reg
->m_hard_num
);
1066 switch (regtype_for_type (reg
->m_type
))
1069 out
.regKind
= BRIG_REGISTER_KIND_SINGLE
;
1072 out
.regKind
= BRIG_REGISTER_KIND_DOUBLE
;
1074 case BRIG_TYPE_B128
:
1075 out
.regKind
= BRIG_REGISTER_KIND_QUAD
;
1078 out
.regKind
= BRIG_REGISTER_KIND_CONTROL
;
1084 brig_operand
.add (&out
, sizeof (out
));
1087 /* Emit an address BRIG operand ADDR. */
1090 emit_address_operand (hsa_op_address
*addr
)
1092 struct BrigOperandAddress out
;
1094 out
.base
.byteCount
= lendian16 (sizeof (out
));
1095 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_ADDRESS
);
1096 out
.symbol
= addr
->m_symbol
1097 ? lendian32 (emit_directive_variable (addr
->m_symbol
)) : 0;
1098 out
.reg
= addr
->m_reg
? lendian32 (enqueue_op (addr
->m_reg
)) : 0;
1100 if (sizeof (addr
->m_imm_offset
) == 8)
1102 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1103 out
.offset
.hi
= lendian32 (addr
->m_imm_offset
>> 32);
1107 gcc_assert (sizeof (addr
->m_imm_offset
) == 4);
1108 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1112 brig_operand
.add (&out
, sizeof (out
));
1115 /* Emit a code reference operand REF. */
1118 emit_code_ref_operand (hsa_op_code_ref
*ref
)
1120 struct BrigOperandCodeRef out
;
1122 out
.base
.byteCount
= lendian16 (sizeof (out
));
1123 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_REF
);
1124 out
.ref
= lendian32 (ref
->m_directive_offset
);
1125 brig_operand
.add (&out
, sizeof (out
));
1128 /* Emit a code list operand CODE_LIST. */
1131 emit_code_list_operand (hsa_op_code_list
*code_list
)
1133 struct BrigOperandCodeList out
;
1134 unsigned args
= code_list
->m_offsets
.length ();
1136 for (unsigned i
= 0; i
< args
; i
++)
1137 gcc_assert (code_list
->m_offsets
[i
]);
1139 out
.base
.byteCount
= lendian16 (sizeof (out
));
1140 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_LIST
);
1142 uint32_t byteCount
= lendian32 (4 * args
);
1144 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1145 brig_data
.add (code_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1146 brig_data
.round_size_up (4);
1147 brig_operand
.add (&out
, sizeof (out
));
1150 /* Emit an operand list operand OPERAND_LIST. */
1153 emit_operand_list_operand (hsa_op_operand_list
*operand_list
)
1155 struct BrigOperandOperandList out
;
1156 unsigned args
= operand_list
->m_offsets
.length ();
1158 for (unsigned i
= 0; i
< args
; i
++)
1159 gcc_assert (operand_list
->m_offsets
[i
]);
1161 out
.base
.byteCount
= lendian16 (sizeof (out
));
1162 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST
);
1164 uint32_t byteCount
= lendian32 (4 * args
);
1166 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1167 brig_data
.add (operand_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1168 brig_data
.round_size_up (4);
1169 brig_operand
.add (&out
, sizeof (out
));
1172 /* Emit all operands queued for writing. */
1175 emit_queued_operands (void)
1177 for (hsa_op_base
*op
= op_queue
.first_op
; op
; op
= op
->m_next
)
1179 gcc_assert (op
->m_brig_op_offset
== brig_operand
.total_size
);
1180 if (hsa_op_immed
*imm
= dyn_cast
<hsa_op_immed
*> (op
))
1181 emit_immediate_operand (imm
);
1182 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
1183 emit_register_operand (reg
);
1184 else if (hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
))
1185 emit_address_operand (addr
);
1186 else if (hsa_op_code_ref
*ref
= dyn_cast
<hsa_op_code_ref
*> (op
))
1187 emit_code_ref_operand (ref
);
1188 else if (hsa_op_code_list
*code_list
= dyn_cast
<hsa_op_code_list
*> (op
))
1189 emit_code_list_operand (code_list
);
1190 else if (hsa_op_operand_list
*l
= dyn_cast
<hsa_op_operand_list
*> (op
))
1191 emit_operand_list_operand (l
);
1197 /* Emit directives describing the function that is used for
1198 a function declaration. */
1200 static BrigDirectiveExecutable
*
1201 emit_function_declaration (tree decl
)
1203 hsa_function_representation
*f
= hsa_generate_function_declaration (decl
);
1205 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1206 emit_queued_operands ();
1213 /* Emit directives describing the function that is used for
1214 an internal function declaration. */
1216 static BrigDirectiveExecutable
*
1217 emit_internal_fn_decl (hsa_internal_fn
*fn
)
1219 hsa_function_representation
*f
= hsa_generate_internal_fn_decl (fn
);
1221 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1222 emit_queued_operands ();
1229 /* Enqueue all operands of INSN and return offset to BRIG data section
1230 to list of operand offsets. */
1233 emit_insn_operands (hsa_insn_basic
*insn
)
1235 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1238 unsigned l
= insn
->operand_count ();
1239 operand_offsets
.safe_grow (l
);
1241 for (unsigned i
= 0; i
< l
; i
++)
1242 operand_offsets
[i
] = lendian32 (enqueue_op (insn
->get_op (i
)));
1244 /* We have N operands so use 4 * N for the byte_count. */
1245 uint32_t byte_count
= lendian32 (4 * l
);
1247 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1248 brig_data
.add (operand_offsets
.address (),
1249 l
* sizeof (BrigOperandOffset32_t
));
1251 brig_data
.round_size_up (4);
1256 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1257 to BRIG data section to list of operand offsets. */
1260 emit_operands (hsa_op_base
*op0
, hsa_op_base
*op1
= NULL
,
1261 hsa_op_base
*op2
= NULL
)
1263 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1266 gcc_checking_assert (op0
!= NULL
);
1267 operand_offsets
.safe_push (enqueue_op (op0
));
1271 operand_offsets
.safe_push (enqueue_op (op1
));
1273 operand_offsets
.safe_push (enqueue_op (op2
));
1276 unsigned l
= operand_offsets
.length ();
1278 /* We have N operands so use 4 * N for the byte_count. */
1279 uint32_t byte_count
= lendian32 (4 * l
);
1281 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1282 brig_data
.add (operand_offsets
.address (),
1283 l
* sizeof (BrigOperandOffset32_t
));
1285 brig_data
.round_size_up (4);
1290 /* Emit an HSA memory instruction and all necessary directives, schedule
1291 necessary operands for writing. */
1294 emit_memory_insn (hsa_insn_mem
*mem
)
1296 struct BrigInstMem repr
;
1297 gcc_checking_assert (mem
->operand_count () == 2);
1299 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1301 /* This is necessary because of the erroneous typedef of
1302 BrigMemoryModifier8_t which introduces padding which may then contain
1303 random stuff (which we do not want so that we can test things don't
1305 memset (&repr
, 0, sizeof (repr
));
1306 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1307 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1308 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1309 repr
.base
.type
= lendian16 (mem
->m_type
);
1310 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1313 repr
.segment
= addr
->m_symbol
->m_segment
;
1315 repr
.segment
= BRIG_SEGMENT_FLAT
;
1317 repr
.equivClass
= mem
->m_equiv_class
;
1318 repr
.align
= mem
->m_align
;
1319 if (mem
->m_opcode
== BRIG_OPCODE_LD
)
1320 repr
.width
= BRIG_WIDTH_1
;
1322 repr
.width
= BRIG_WIDTH_NONE
;
1323 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1324 brig_code
.add (&repr
, sizeof (repr
));
1328 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1329 necessary operands for writing. */
1332 emit_signal_insn (hsa_insn_signal
*mem
)
1334 struct BrigInstSignal repr
;
1336 /* This is necessary because of the erroneous typedef of
1337 BrigMemoryModifier8_t which introduces padding which may then contain
1338 random stuff (which we do not want so that we can test things don't
1340 memset (&repr
, 0, sizeof (repr
));
1341 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1342 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SIGNAL
);
1343 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1344 repr
.base
.type
= lendian16 (mem
->m_type
);
1345 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1347 repr
.memoryOrder
= mem
->m_memoryorder
;
1348 repr
.signalOperation
= mem
->m_atomicop
;
1349 repr
.signalType
= BRIG_TYPE_SIG64
;
1351 brig_code
.add (&repr
, sizeof (repr
));
1355 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1356 necessary operands for writing. */
1359 emit_atomic_insn (hsa_insn_atomic
*mem
)
1361 struct BrigInstAtomic repr
;
1363 /* Either operand[0] or operand[1] must be an address operand. */
1364 hsa_op_address
*addr
= NULL
;
1365 if (is_a
<hsa_op_address
*> (mem
->get_op (0)))
1366 addr
= as_a
<hsa_op_address
*> (mem
->get_op (0));
1368 addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1370 /* This is necessary because of the erroneous typedef of
1371 BrigMemoryModifier8_t which introduces padding which may then contain
1372 random stuff (which we do not want so that we can test things don't
1374 memset (&repr
, 0, sizeof (repr
));
1375 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1376 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ATOMIC
);
1377 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1378 repr
.base
.type
= lendian16 (mem
->m_type
);
1379 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1382 repr
.segment
= addr
->m_symbol
->m_segment
;
1384 repr
.segment
= BRIG_SEGMENT_FLAT
;
1385 repr
.memoryOrder
= mem
->m_memoryorder
;
1386 repr
.memoryScope
= mem
->m_memoryscope
;
1387 repr
.atomicOperation
= mem
->m_atomicop
;
1389 brig_code
.add (&repr
, sizeof (repr
));
1393 /* Emit an HSA LDA instruction and all necessary directives, schedule
1394 necessary operands for writing. */
1397 emit_addr_insn (hsa_insn_basic
*insn
)
1399 struct BrigInstAddr repr
;
1401 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (insn
->get_op (1));
1403 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1404 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ADDR
);
1405 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1406 repr
.base
.type
= lendian16 (insn
->m_type
);
1407 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1410 repr
.segment
= addr
->m_symbol
->m_segment
;
1412 repr
.segment
= BRIG_SEGMENT_FLAT
;
1413 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1415 brig_code
.add (&repr
, sizeof (repr
));
1419 /* Emit an HSA segment conversion instruction and all necessary directives,
1420 schedule necessary operands for writing. */
1423 emit_segment_insn (hsa_insn_seg
*seg
)
1425 struct BrigInstSegCvt repr
;
1427 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1428 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SEG_CVT
);
1429 repr
.base
.opcode
= lendian16 (seg
->m_opcode
);
1430 repr
.base
.type
= lendian16 (seg
->m_type
);
1431 repr
.base
.operands
= lendian32 (emit_insn_operands (seg
));
1432 repr
.sourceType
= lendian16 (as_a
<hsa_op_reg
*> (seg
->get_op (1))->m_type
);
1433 repr
.segment
= seg
->m_segment
;
1436 brig_code
.add (&repr
, sizeof (repr
));
1441 /* Emit an HSA alloca instruction and all necessary directives,
1442 schedule necessary operands for writing. */
1445 emit_alloca_insn (hsa_insn_alloca
*alloca
)
1447 struct BrigInstMem repr
;
1448 gcc_checking_assert (alloca
->operand_count () == 2);
1450 /* This is necessary because of the erroneous typedef of
1451 BrigMemoryModifier8_t which introduces padding which may then contain
1452 random stuff (which we do not want so that we can test things don't
1454 memset (&repr
, 0, sizeof (repr
));
1455 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1456 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1457 repr
.base
.opcode
= lendian16 (alloca
->m_opcode
);
1458 repr
.base
.type
= lendian16 (alloca
->m_type
);
1459 repr
.base
.operands
= lendian32 (emit_insn_operands (alloca
));
1460 repr
.segment
= BRIG_SEGMENT_PRIVATE
;
1462 repr
.equivClass
= 0;
1463 repr
.align
= alloca
->m_align
;
1464 repr
.width
= BRIG_WIDTH_NONE
;
1465 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1466 brig_code
.add (&repr
, sizeof (repr
));
1470 /* Emit an HSA comparison instruction and all necessary directives,
1471 schedule necessary operands for writing. */
1474 emit_cmp_insn (hsa_insn_cmp
*cmp
)
1476 struct BrigInstCmp repr
;
1478 memset (&repr
, 0, sizeof (repr
));
1479 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1480 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CMP
);
1481 repr
.base
.opcode
= lendian16 (cmp
->m_opcode
);
1482 repr
.base
.type
= lendian16 (cmp
->m_type
);
1483 repr
.base
.operands
= lendian32 (emit_insn_operands (cmp
));
1485 if (is_a
<hsa_op_reg
*> (cmp
->get_op (1)))
1487 = lendian16 (as_a
<hsa_op_reg
*> (cmp
->get_op (1))->m_type
);
1490 = lendian16 (as_a
<hsa_op_immed
*> (cmp
->get_op (1))->m_type
);
1492 repr
.compare
= cmp
->m_compare
;
1495 brig_code
.add (&repr
, sizeof (repr
));
1499 /* Emit an HSA branching instruction and all necessary directives, schedule
1500 necessary operands for writing. */
1503 emit_branch_insn (hsa_insn_br
*br
)
1505 struct BrigInstBr repr
;
1507 basic_block target
= NULL
;
1511 /* At the moment we only handle direct conditional jumps. */
1512 gcc_assert (br
->m_opcode
== BRIG_OPCODE_CBR
);
1513 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1514 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1515 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1516 repr
.width
= BRIG_WIDTH_1
;
1517 /* For Conditional jumps the type is always B1. */
1518 repr
.base
.type
= lendian16 (BRIG_TYPE_B1
);
1520 FOR_EACH_EDGE (e
, ei
, br
->m_bb
->succs
)
1521 if (e
->flags
& EDGE_TRUE_VALUE
)
1526 gcc_assert (target
);
1529 = lendian32 (emit_operands (br
->get_op (0),
1530 &hsa_bb_for_bb (target
)->m_label_ref
));
1531 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1533 brig_code
.add (&repr
, sizeof (repr
));
1537 /* Emit an HSA unconditional jump branching instruction that points to
1538 a label REFERENCE. */
1541 emit_unconditional_jump (hsa_op_code_ref
*reference
)
1543 struct BrigInstBr repr
;
1545 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1546 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1547 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_BR
);
1548 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1549 /* Direct branches to labels must be width(all). */
1550 repr
.width
= BRIG_WIDTH_ALL
;
1552 repr
.base
.operands
= lendian32 (emit_operands (reference
));
1553 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1554 brig_code
.add (&repr
, sizeof (repr
));
1558 /* Emit an HSA switch jump instruction that uses a jump table to
1559 jump to a destination label. */
1562 emit_switch_insn (hsa_insn_sbr
*sbr
)
1564 struct BrigInstBr repr
;
1566 gcc_assert (sbr
->m_opcode
== BRIG_OPCODE_SBR
);
1567 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1568 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1569 repr
.base
.opcode
= lendian16 (sbr
->m_opcode
);
1570 repr
.width
= BRIG_WIDTH_1
;
1571 /* For Conditional jumps the type is always B1. */
1572 hsa_op_reg
*index
= as_a
<hsa_op_reg
*> (sbr
->get_op (0));
1573 repr
.base
.type
= lendian16 (index
->m_type
);
1575 = lendian32 (emit_operands (sbr
->get_op (0), sbr
->m_label_code_list
));
1576 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1578 brig_code
.add (&repr
, sizeof (repr
));
1582 /* Emit a HSA convert instruction and all necessary directives, schedule
1583 necessary operands for writing. */
1586 emit_cvt_insn (hsa_insn_cvt
*insn
)
1588 struct BrigInstCvt repr
;
1589 BrigType16_t srctype
;
1591 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1592 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CVT
);
1593 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1594 repr
.base
.type
= lendian16 (insn
->m_type
);
1595 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1597 if (is_a
<hsa_op_reg
*> (insn
->get_op (1)))
1598 srctype
= as_a
<hsa_op_reg
*> (insn
->get_op (1))->m_type
;
1600 srctype
= as_a
<hsa_op_immed
*> (insn
->get_op (1))->m_type
;
1601 repr
.sourceType
= lendian16 (srctype
);
1603 /* float to smaller float requires a rounding setting (we default
1605 if (hsa_type_float_p (insn
->m_type
)
1606 && (!hsa_type_float_p (srctype
)
1607 || ((insn
->m_type
& BRIG_TYPE_BASE_MASK
)
1608 < (srctype
& BRIG_TYPE_BASE_MASK
))))
1609 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1610 else if (hsa_type_integer_p (insn
->m_type
) &&
1611 hsa_type_float_p (srctype
))
1612 repr
.round
= BRIG_ROUND_INTEGER_ZERO
;
1614 repr
.round
= BRIG_ROUND_NONE
;
1615 brig_code
.add (&repr
, sizeof (repr
));
1619 /* Emit call instruction INSN, where this instruction must be closed
1620 within a call block instruction. */
1623 emit_call_insn (hsa_insn_call
*call
)
1625 struct BrigInstBr repr
;
1627 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1628 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1629 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_CALL
);
1630 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1633 = lendian32 (emit_operands (call
->m_result_code_list
, &call
->m_func
,
1634 call
->m_args_code_list
));
1636 /* Internal functions have not set m_called_function. */
1637 if (call
->m_called_function
)
1639 function_linkage_pair
pair (call
->m_called_function
,
1640 call
->m_func
.m_brig_op_offset
);
1641 function_call_linkage
.safe_push (pair
);
1645 hsa_internal_fn
*slot
1646 = hsa_emitted_internal_decls
->find (call
->m_called_internal_fn
);
1648 gcc_assert (slot
->m_offset
> 0);
1649 call
->m_func
.m_directive_offset
= slot
->m_offset
;
1652 repr
.width
= BRIG_WIDTH_ALL
;
1653 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1655 brig_code
.add (&repr
, sizeof (repr
));
1659 /* Emit argument block directive. */
1662 emit_arg_block_insn (hsa_insn_arg_block
*insn
)
1664 switch (insn
->m_kind
)
1666 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
:
1668 struct BrigDirectiveArgBlock repr
;
1669 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1670 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1671 brig_code
.add (&repr
, sizeof (repr
));
1673 for (unsigned i
= 0; i
< insn
->m_call_insn
->m_input_args
.length (); i
++)
1675 insn
->m_call_insn
->m_args_code_list
->m_offsets
[i
]
1676 = lendian32 (emit_directive_variable
1677 (insn
->m_call_insn
->m_input_args
[i
]));
1681 if (insn
->m_call_insn
->m_output_arg
)
1683 insn
->m_call_insn
->m_result_code_list
->m_offsets
[0]
1684 = lendian32 (emit_directive_variable
1685 (insn
->m_call_insn
->m_output_arg
));
1691 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END
:
1693 struct BrigDirectiveArgBlock repr
;
1694 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1695 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1696 brig_code
.add (&repr
, sizeof (repr
));
1706 /* Emit comment directive. */
1709 emit_comment_insn (hsa_insn_comment
*insn
)
1711 struct BrigDirectiveComment repr
;
1712 memset (&repr
, 0, sizeof (repr
));
1714 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1715 repr
.base
.kind
= lendian16 (insn
->m_opcode
);
1716 repr
.name
= brig_emit_string (insn
->m_comment
, '\0', false);
1717 brig_code
.add (&repr
, sizeof (repr
));
1720 /* Emit queue instruction INSN. */
1723 emit_queue_insn (hsa_insn_queue
*insn
)
1726 memset (&repr
, 0, sizeof (repr
));
1728 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1729 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_QUEUE
);
1730 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1731 repr
.base
.type
= lendian16 (insn
->m_type
);
1732 repr
.segment
= BRIG_SEGMENT_GLOBAL
;
1733 repr
.memoryOrder
= BRIG_MEMORY_ORDER_SC_RELEASE
;
1734 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1735 brig_data
.round_size_up (4);
1736 brig_code
.add (&repr
, sizeof (repr
));
1741 /* Emit source type instruction INSN. */
1744 emit_srctype_insn (hsa_insn_srctype
*insn
)
1746 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1747 struct BrigInstSourceType repr
;
1748 unsigned operand_count
= insn
->operand_count ();
1749 gcc_checking_assert (operand_count
>= 2);
1751 memset (&repr
, 0, sizeof (repr
));
1752 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1753 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1754 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1755 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1756 repr
.base
.type
= lendian16 (insn
->m_type
);
1758 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1759 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1763 /* Emit packed instruction INSN. */
1766 emit_packed_insn (hsa_insn_packed
*insn
)
1768 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1769 struct BrigInstSourceType repr
;
1770 unsigned operand_count
= insn
->operand_count ();
1771 gcc_checking_assert (operand_count
>= 2);
1773 memset (&repr
, 0, sizeof (repr
));
1774 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1775 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1776 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1777 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1778 repr
.base
.type
= lendian16 (insn
->m_type
);
1780 if (insn
->m_opcode
== BRIG_OPCODE_COMBINE
)
1782 /* Create operand list for packed type. */
1783 for (unsigned i
= 1; i
< operand_count
; i
++)
1785 gcc_checking_assert (insn
->get_op (i
));
1786 insn
->m_operand_list
->m_offsets
[i
- 1]
1787 = lendian32 (enqueue_op (insn
->get_op (i
)));
1790 repr
.base
.operands
= lendian32 (emit_operands (insn
->get_op (0),
1791 insn
->m_operand_list
));
1793 else if (insn
->m_opcode
== BRIG_OPCODE_EXPAND
)
1795 /* Create operand list for packed type. */
1796 for (unsigned i
= 0; i
< operand_count
- 1; i
++)
1798 gcc_checking_assert (insn
->get_op (i
));
1799 insn
->m_operand_list
->m_offsets
[i
]
1800 = lendian32 (enqueue_op (insn
->get_op (i
)));
1803 unsigned ops
= emit_operands (insn
->m_operand_list
,
1804 insn
->get_op (insn
->operand_count () - 1));
1805 repr
.base
.operands
= lendian32 (ops
);
1809 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1813 /* Emit a basic HSA instruction and all necessary directives, schedule
1814 necessary operands for writing. */
1817 emit_basic_insn (hsa_insn_basic
*insn
)
1819 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1820 struct BrigInstMod repr
;
1823 memset (&repr
, 0, sizeof (repr
));
1824 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstBasic
));
1825 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BASIC
);
1826 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1827 switch (insn
->m_opcode
)
1829 /* And the bit-logical operations need bit types and whine about
1830 arithmetic types :-/ */
1831 case BRIG_OPCODE_AND
:
1832 case BRIG_OPCODE_OR
:
1833 case BRIG_OPCODE_XOR
:
1834 case BRIG_OPCODE_NOT
:
1835 type
= regtype_for_type (insn
->m_type
);
1838 type
= insn
->m_type
;
1841 repr
.base
.type
= lendian16 (type
);
1842 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1844 if (hsa_type_packed_p (type
))
1846 if (hsa_type_float_p (type
)
1847 && !hsa_opcode_floating_bit_insn_p (insn
->m_opcode
))
1848 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1851 /* We assume that destination and sources agree in packing layout. */
1852 if (insn
->num_used_ops () >= 2)
1853 repr
.pack
= BRIG_PACK_PP
;
1855 repr
.pack
= BRIG_PACK_P
;
1857 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstMod
));
1858 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MOD
);
1859 brig_code
.add (&repr
, sizeof (struct BrigInstMod
));
1862 brig_code
.add (&repr
, sizeof (struct BrigInstBasic
));
1866 /* Emit an HSA instruction and all necessary directives, schedule necessary
1867 operands for writing. */
1870 emit_insn (hsa_insn_basic
*insn
)
1872 gcc_assert (!is_a
<hsa_insn_phi
*> (insn
));
1874 insn
->m_brig_offset
= brig_code
.total_size
;
1876 if (hsa_insn_signal
*signal
= dyn_cast
<hsa_insn_signal
*> (insn
))
1877 emit_signal_insn (signal
);
1878 else if (hsa_insn_atomic
*atom
= dyn_cast
<hsa_insn_atomic
*> (insn
))
1879 emit_atomic_insn (atom
);
1880 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
1881 emit_memory_insn (mem
);
1882 else if (insn
->m_opcode
== BRIG_OPCODE_LDA
)
1883 emit_addr_insn (insn
);
1884 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
1885 emit_segment_insn (seg
);
1886 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
1887 emit_cmp_insn (cmp
);
1888 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
1889 emit_branch_insn (br
);
1890 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
1892 if (switch_instructions
== NULL
)
1893 switch_instructions
= new vec
<hsa_insn_sbr
*> ();
1895 switch_instructions
->safe_push (sbr
);
1896 emit_switch_insn (sbr
);
1898 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
1899 emit_arg_block_insn (block
);
1900 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
1901 emit_call_insn (call
);
1902 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
1903 emit_comment_insn (comment
);
1904 else if (hsa_insn_queue
*queue
= dyn_cast
<hsa_insn_queue
*> (insn
))
1905 emit_queue_insn (queue
);
1906 else if (hsa_insn_srctype
*srctype
= dyn_cast
<hsa_insn_srctype
*> (insn
))
1907 emit_srctype_insn (srctype
);
1908 else if (hsa_insn_packed
*packed
= dyn_cast
<hsa_insn_packed
*> (insn
))
1909 emit_packed_insn (packed
);
1910 else if (hsa_insn_cvt
*cvt
= dyn_cast
<hsa_insn_cvt
*> (insn
))
1911 emit_cvt_insn (cvt
);
1912 else if (hsa_insn_alloca
*alloca
= dyn_cast
<hsa_insn_alloca
*> (insn
))
1913 emit_alloca_insn (alloca
);
1915 emit_basic_insn (insn
);
1918 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1919 or we are about to finish emitting code, if it is NULL. If the fall through
1920 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1923 perhaps_emit_branch (basic_block bb
, basic_block next_bb
)
1925 basic_block t_bb
= NULL
, ff
= NULL
;
1930 /* If the last instruction of BB is a switch, ignore emission of all
1932 if (hsa_bb_for_bb (bb
)->m_last_insn
1933 && is_a
<hsa_insn_sbr
*> (hsa_bb_for_bb (bb
)->m_last_insn
))
1936 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1937 if (e
->flags
& EDGE_TRUE_VALUE
)
1948 if (!ff
|| ff
== next_bb
|| ff
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1951 emit_unconditional_jump (&hsa_bb_for_bb (ff
)->m_label_ref
);
1954 /* Emit the a function with name NAME to the various brig sections. */
1957 hsa_brig_emit_function (void)
1959 basic_block bb
, prev_bb
;
1960 hsa_insn_basic
*insn
;
1961 BrigDirectiveExecutable
*ptr_to_fndir
;
1965 brig_insn_count
= 0;
1966 memset (&op_queue
, 0, sizeof (op_queue
));
1967 op_queue
.projected_size
= brig_operand
.total_size
;
1969 if (!function_offsets
)
1970 function_offsets
= new hash_map
<tree
, BrigCodeOffset32_t
> ();
1972 if (!emitted_declarations
)
1973 emitted_declarations
= new hash_map
<tree
, BrigDirectiveExecutable
*> ();
1975 for (unsigned i
= 0; i
< hsa_cfun
->m_called_functions
.length (); i
++)
1977 tree called
= hsa_cfun
->m_called_functions
[i
];
1979 /* If the function has no definition, emit a declaration. */
1980 if (!emitted_declarations
->get (called
))
1982 BrigDirectiveExecutable
*e
= emit_function_declaration (called
);
1983 emitted_declarations
->put (called
, e
);
1987 for (unsigned i
= 0; i
< hsa_cfun
->m_called_internal_fns
.length (); i
++)
1989 hsa_internal_fn
*called
= hsa_cfun
->m_called_internal_fns
[i
];
1990 emit_internal_fn_decl (called
);
1993 ptr_to_fndir
= emit_function_directives (hsa_cfun
, false);
1994 for (insn
= hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->m_first_insn
;
1996 insn
= insn
->m_next
)
1998 prev_bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
1999 FOR_EACH_BB_FN (bb
, cfun
)
2001 perhaps_emit_branch (prev_bb
, bb
);
2002 emit_bb_label_directive (hsa_bb_for_bb (bb
));
2003 for (insn
= hsa_bb_for_bb (bb
)->m_first_insn
; insn
; insn
= insn
->m_next
)
2007 perhaps_emit_branch (prev_bb
, NULL
);
2008 ptr_to_fndir
->nextModuleEntry
= brig_code
.total_size
;
2010 /* Fill up label references for all sbr instructions. */
2011 if (switch_instructions
)
2013 for (unsigned i
= 0; i
< switch_instructions
->length (); i
++)
2015 hsa_insn_sbr
*sbr
= (*switch_instructions
)[i
];
2016 for (unsigned j
= 0; j
< sbr
->m_jump_table
.length (); j
++)
2018 hsa_bb
*hbb
= hsa_bb_for_bb (sbr
->m_jump_table
[j
]);
2019 sbr
->m_label_code_list
->m_offsets
[j
]
2020 = hbb
->m_label_ref
.m_directive_offset
;
2024 switch_instructions
->release ();
2025 delete switch_instructions
;
2026 switch_instructions
= NULL
;
2031 fprintf (dump_file
, "------- After BRIG emission: -------\n");
2032 dump_hsa_cfun (dump_file
);
2035 emit_queued_operands ();
2038 /* Emit all OMP symbols related to OMP. */
2041 hsa_brig_emit_omp_symbols (void)
2044 emit_directive_variable (hsa_num_threads
);
2047 /* Create and return __hsa_global_variables symbol that contains
2048 all informations consumed by libgomp to link global variables
2049 with their string names used by an HSA kernel. */
2052 hsa_output_global_variables ()
2054 unsigned l
= hsa_global_variable_symbols
->elements ();
2056 tree variable_info_type
= make_node (RECORD_TYPE
);
2057 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2058 get_identifier ("name"), ptr_type_node
);
2059 DECL_CHAIN (id_f1
) = NULL_TREE
;
2060 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2061 get_identifier ("omp_data_size"),
2063 DECL_CHAIN (id_f2
) = id_f1
;
2064 finish_builtin_struct (variable_info_type
, "__hsa_variable_info", id_f2
,
2067 tree int_num_of_global_vars
;
2068 int_num_of_global_vars
= build_int_cst (uint32_type_node
, l
);
2069 tree global_vars_num_index_type
= build_index_type (int_num_of_global_vars
);
2070 tree global_vars_array_type
= build_array_type (variable_info_type
,
2071 global_vars_num_index_type
);
2072 TYPE_ARTIFICIAL (global_vars_array_type
) = 1;
2074 vec
<constructor_elt
, va_gc
> *global_vars_vec
= NULL
;
2076 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
2077 = hsa_global_variable_symbols
->begin ();
2078 it
!= hsa_global_variable_symbols
->end (); ++it
)
2080 unsigned len
= strlen ((*it
)->m_name
);
2081 char *copy
= XNEWVEC (char, len
+ 2);
2083 memcpy (copy
+ 1, (*it
)->m_name
, len
);
2084 copy
[len
+ 1] = '\0';
2086 hsa_sanitize_name (copy
);
2088 tree var_name
= build_string (len
, copy
);
2089 TREE_TYPE (var_name
)
2090 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2093 vec
<constructor_elt
, va_gc
> *variable_info_vec
= NULL
;
2094 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2096 build_pointer_type (TREE_TYPE (var_name
)),
2098 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2099 build_fold_addr_expr ((*it
)->m_decl
));
2101 tree variable_info_ctor
= build_constructor (variable_info_type
,
2104 CONSTRUCTOR_APPEND_ELT (global_vars_vec
, NULL_TREE
,
2105 variable_info_ctor
);
2108 tree global_vars_ctor
= build_constructor (global_vars_array_type
,
2112 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_global_variables", 1);
2113 tree global_vars_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2114 get_identifier (tmp_name
),
2115 global_vars_array_type
);
2116 TREE_STATIC (global_vars_table
) = 1;
2117 TREE_READONLY (global_vars_table
) = 1;
2118 TREE_PUBLIC (global_vars_table
) = 0;
2119 DECL_ARTIFICIAL (global_vars_table
) = 1;
2120 DECL_IGNORED_P (global_vars_table
) = 1;
2121 DECL_EXTERNAL (global_vars_table
) = 0;
2122 TREE_CONSTANT (global_vars_table
) = 1;
2123 DECL_INITIAL (global_vars_table
) = global_vars_ctor
;
2124 varpool_node::finalize_decl (global_vars_table
);
2126 return global_vars_table
;
2129 /* Create __hsa_host_functions and __hsa_kernels that contain
2130 all informations consumed by libgomp to register all kernels
2131 in the BRIG binary. */
2134 hsa_output_kernels (tree
*host_func_table
, tree
*kernels
)
2136 unsigned map_count
= hsa_get_number_decl_kernel_mappings ();
2138 tree int_num_of_kernels
;
2139 int_num_of_kernels
= build_int_cst (uint32_type_node
, map_count
);
2140 tree kernel_num_index_type
= build_index_type (int_num_of_kernels
);
2141 tree host_functions_array_type
= build_array_type (ptr_type_node
,
2142 kernel_num_index_type
);
2143 TYPE_ARTIFICIAL (host_functions_array_type
) = 1;
2145 vec
<constructor_elt
, va_gc
> *host_functions_vec
= NULL
;
2146 for (unsigned i
= 0; i
< map_count
; ++i
)
2148 tree decl
= hsa_get_decl_kernel_mapping_decl (i
);
2149 tree host_fn
= build_fold_addr_expr (hsa_get_host_function (decl
));
2150 CONSTRUCTOR_APPEND_ELT (host_functions_vec
, NULL_TREE
, host_fn
);
2152 tree host_functions_ctor
= build_constructor (host_functions_array_type
,
2153 host_functions_vec
);
2155 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_host_functions", 1);
2156 tree hsa_host_func_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2157 get_identifier (tmp_name
),
2158 host_functions_array_type
);
2159 TREE_STATIC (hsa_host_func_table
) = 1;
2160 TREE_READONLY (hsa_host_func_table
) = 1;
2161 TREE_PUBLIC (hsa_host_func_table
) = 0;
2162 DECL_ARTIFICIAL (hsa_host_func_table
) = 1;
2163 DECL_IGNORED_P (hsa_host_func_table
) = 1;
2164 DECL_EXTERNAL (hsa_host_func_table
) = 0;
2165 TREE_CONSTANT (hsa_host_func_table
) = 1;
2166 DECL_INITIAL (hsa_host_func_table
) = host_functions_ctor
;
2167 varpool_node::finalize_decl (hsa_host_func_table
);
2168 *host_func_table
= hsa_host_func_table
;
2170 /* Following code emits list of kernel_info structures. */
2172 tree kernel_info_type
= make_node (RECORD_TYPE
);
2173 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2174 get_identifier ("name"), ptr_type_node
);
2175 DECL_CHAIN (id_f1
) = NULL_TREE
;
2176 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2177 get_identifier ("omp_data_size"),
2178 unsigned_type_node
);
2179 DECL_CHAIN (id_f2
) = id_f1
;
2180 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2181 get_identifier ("gridified_kernel_p"),
2183 DECL_CHAIN (id_f3
) = id_f2
;
2184 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2185 get_identifier ("kernel_dependencies_count"),
2186 unsigned_type_node
);
2187 DECL_CHAIN (id_f4
) = id_f3
;
2188 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2189 get_identifier ("kernel_dependencies"),
2190 build_pointer_type (build_pointer_type
2192 DECL_CHAIN (id_f5
) = id_f4
;
2193 finish_builtin_struct (kernel_info_type
, "__hsa_kernel_info", id_f5
,
2196 int_num_of_kernels
= build_int_cstu (uint32_type_node
, map_count
);
2197 tree kernel_info_vector_type
2198 = build_array_type (kernel_info_type
,
2199 build_index_type (int_num_of_kernels
));
2200 TYPE_ARTIFICIAL (kernel_info_vector_type
) = 1;
2202 vec
<constructor_elt
, va_gc
> *kernel_info_vector_vec
= NULL
;
2203 tree kernel_dependencies_vector_type
= NULL
;
2205 for (unsigned i
= 0; i
< map_count
; ++i
)
2207 tree kernel
= hsa_get_decl_kernel_mapping_decl (i
);
2208 char *name
= hsa_get_decl_kernel_mapping_name (i
);
2209 unsigned len
= strlen (name
);
2210 char *copy
= XNEWVEC (char, len
+ 2);
2212 memcpy (copy
+ 1, name
, len
);
2213 copy
[len
+ 1] = '\0';
2216 tree kern_name
= build_string (len
, copy
);
2217 TREE_TYPE (kern_name
)
2218 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2221 unsigned omp_size
= hsa_get_decl_kernel_mapping_omp_size (i
);
2222 tree omp_data_size
= build_int_cstu (unsigned_type_node
, omp_size
);
2223 bool gridified_kernel_p
= hsa_get_decl_kernel_mapping_gridified (i
);
2224 tree gridified_kernel_p_tree
= build_int_cstu (boolean_type_node
,
2225 gridified_kernel_p
);
2228 kernel_dependencies_vector_type
2229 = build_array_type (build_pointer_type (char_type_node
),
2230 build_index_type (size_int (0)));
2232 vec
<constructor_elt
, va_gc
> *kernel_dependencies_vec
= NULL
;
2233 if (hsa_decl_kernel_dependencies
)
2235 vec
<const char *> **slot
;
2236 slot
= hsa_decl_kernel_dependencies
->get (kernel
);
2239 vec
<const char *> *dependencies
= *slot
;
2240 count
= dependencies
->length ();
2242 kernel_dependencies_vector_type
2243 = build_array_type (build_pointer_type (char_type_node
),
2244 build_index_type (size_int (count
)));
2245 TYPE_ARTIFICIAL (kernel_dependencies_vector_type
) = 1;
2247 for (unsigned j
= 0; j
< count
; j
++)
2249 const char *d
= (*dependencies
)[j
];
2251 tree dependency_name
= build_string (len
, d
);
2252 TREE_TYPE (dependency_name
)
2253 = build_array_type (char_type_node
,
2254 build_index_type (size_int (len
)));
2256 CONSTRUCTOR_APPEND_ELT
2257 (kernel_dependencies_vec
, NULL_TREE
,
2259 build_pointer_type (TREE_TYPE (dependency_name
)),
2265 tree dependencies_count
= build_int_cstu (unsigned_type_node
, count
);
2267 vec
<constructor_elt
, va_gc
> *kernel_info_vec
= NULL
;
2268 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2270 build_pointer_type (TREE_TYPE
2273 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, omp_data_size
);
2274 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2275 gridified_kernel_p_tree
);
2276 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, dependencies_count
);
2280 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_dependencies_list", i
);
2281 tree dependencies_list
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2282 get_identifier (tmp_name
),
2283 kernel_dependencies_vector_type
);
2285 TREE_STATIC (dependencies_list
) = 1;
2286 TREE_READONLY (dependencies_list
) = 1;
2287 TREE_PUBLIC (dependencies_list
) = 0;
2288 DECL_ARTIFICIAL (dependencies_list
) = 1;
2289 DECL_IGNORED_P (dependencies_list
) = 1;
2290 DECL_EXTERNAL (dependencies_list
) = 0;
2291 TREE_CONSTANT (dependencies_list
) = 1;
2292 DECL_INITIAL (dependencies_list
)
2293 = build_constructor (kernel_dependencies_vector_type
,
2294 kernel_dependencies_vec
);
2295 varpool_node::finalize_decl (dependencies_list
);
2297 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2300 (TREE_TYPE (dependencies_list
)),
2301 dependencies_list
));
2304 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, null_pointer_node
);
2306 tree kernel_info_ctor
= build_constructor (kernel_info_type
,
2309 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec
, NULL_TREE
,
2313 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_kernels", 1);
2314 tree hsa_kernels
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2315 get_identifier (tmp_name
),
2316 kernel_info_vector_type
);
2318 TREE_STATIC (hsa_kernels
) = 1;
2319 TREE_READONLY (hsa_kernels
) = 1;
2320 TREE_PUBLIC (hsa_kernels
) = 0;
2321 DECL_ARTIFICIAL (hsa_kernels
) = 1;
2322 DECL_IGNORED_P (hsa_kernels
) = 1;
2323 DECL_EXTERNAL (hsa_kernels
) = 0;
2324 TREE_CONSTANT (hsa_kernels
) = 1;
2325 DECL_INITIAL (hsa_kernels
) = build_constructor (kernel_info_vector_type
,
2326 kernel_info_vector_vec
);
2327 varpool_node::finalize_decl (hsa_kernels
);
2328 *kernels
= hsa_kernels
;
2331 /* Create a static constructor that will register out brig stuff with
2335 hsa_output_libgomp_mapping (tree brig_decl
)
2337 unsigned kernel_count
= hsa_get_number_decl_kernel_mappings ();
2338 unsigned global_variable_count
= hsa_global_variable_symbols
->elements ();
2341 tree host_func_table
;
2343 hsa_output_kernels (&host_func_table
, &kernels
);
2344 tree global_vars
= hsa_output_global_variables ();
2346 tree hsa_image_desc_type
= make_node (RECORD_TYPE
);
2347 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2348 get_identifier ("brig_module"), ptr_type_node
);
2349 DECL_CHAIN (id_f1
) = NULL_TREE
;
2350 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2351 get_identifier ("kernel_count"),
2352 unsigned_type_node
);
2354 DECL_CHAIN (id_f2
) = id_f1
;
2355 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2356 get_identifier ("hsa_kernel_infos"),
2358 DECL_CHAIN (id_f3
) = id_f2
;
2359 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2360 get_identifier ("global_variable_count"),
2361 unsigned_type_node
);
2362 DECL_CHAIN (id_f4
) = id_f3
;
2363 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2364 get_identifier ("hsa_global_variable_infos"),
2366 DECL_CHAIN (id_f5
) = id_f4
;
2367 finish_builtin_struct (hsa_image_desc_type
, "__hsa_image_desc", id_f5
,
2369 TYPE_ARTIFICIAL (hsa_image_desc_type
) = 1;
2371 vec
<constructor_elt
, va_gc
> *img_desc_vec
= NULL
;
2372 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2373 build_fold_addr_expr (brig_decl
));
2374 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2375 build_int_cstu (unsigned_type_node
, kernel_count
));
2376 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2378 build_pointer_type (TREE_TYPE (kernels
)),
2380 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2381 build_int_cstu (unsigned_type_node
,
2382 global_variable_count
));
2383 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2385 build_pointer_type (TREE_TYPE (global_vars
)),
2388 tree img_desc_ctor
= build_constructor (hsa_image_desc_type
, img_desc_vec
);
2391 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_img_descriptor", 1);
2392 tree hsa_img_descriptor
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2393 get_identifier (tmp_name
),
2394 hsa_image_desc_type
);
2395 TREE_STATIC (hsa_img_descriptor
) = 1;
2396 TREE_READONLY (hsa_img_descriptor
) = 1;
2397 TREE_PUBLIC (hsa_img_descriptor
) = 0;
2398 DECL_ARTIFICIAL (hsa_img_descriptor
) = 1;
2399 DECL_IGNORED_P (hsa_img_descriptor
) = 1;
2400 DECL_EXTERNAL (hsa_img_descriptor
) = 0;
2401 TREE_CONSTANT (hsa_img_descriptor
) = 1;
2402 DECL_INITIAL (hsa_img_descriptor
) = img_desc_ctor
;
2403 varpool_node::finalize_decl (hsa_img_descriptor
);
2405 /* Construct the "host_table" libgomp expects. */
2406 tree index_type
= build_index_type (build_int_cst (integer_type_node
, 4));
2407 tree libgomp_host_table_type
= build_array_type (ptr_type_node
, index_type
);
2408 TYPE_ARTIFICIAL (libgomp_host_table_type
) = 1;
2409 vec
<constructor_elt
, va_gc
> *libgomp_host_table_vec
= NULL
;
2410 tree host_func_table_addr
= build_fold_addr_expr (host_func_table
);
2411 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2412 host_func_table_addr
);
2413 offset_int func_table_size
2414 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node
)) * kernel_count
;
2415 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2416 fold_build2 (POINTER_PLUS_EXPR
,
2417 TREE_TYPE (host_func_table_addr
),
2418 host_func_table_addr
,
2419 build_int_cst (size_type_node
,
2420 func_table_size
.to_uhwi
2422 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2423 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2424 tree libgomp_host_table_ctor
= build_constructor (libgomp_host_table_type
,
2425 libgomp_host_table_vec
);
2426 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_libgomp_host_table", 1);
2427 tree hsa_libgomp_host_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2428 get_identifier (tmp_name
),
2429 libgomp_host_table_type
);
2431 TREE_STATIC (hsa_libgomp_host_table
) = 1;
2432 TREE_READONLY (hsa_libgomp_host_table
) = 1;
2433 TREE_PUBLIC (hsa_libgomp_host_table
) = 0;
2434 DECL_ARTIFICIAL (hsa_libgomp_host_table
) = 1;
2435 DECL_IGNORED_P (hsa_libgomp_host_table
) = 1;
2436 DECL_EXTERNAL (hsa_libgomp_host_table
) = 0;
2437 TREE_CONSTANT (hsa_libgomp_host_table
) = 1;
2438 DECL_INITIAL (hsa_libgomp_host_table
) = libgomp_host_table_ctor
;
2439 varpool_node::finalize_decl (hsa_libgomp_host_table
);
2441 /* Generate an initializer with a call to the registration routine. */
2443 tree offload_register
2444 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER
);
2445 gcc_checking_assert (offload_register
);
2447 tree
*hsa_ctor_stmts
= hsa_get_ctor_statements ();
2448 append_to_statement_list
2449 (build_call_expr (offload_register
, 4,
2450 build_int_cstu (unsigned_type_node
,
2451 GOMP_VERSION_PACK (GOMP_VERSION
,
2453 build_fold_addr_expr (hsa_libgomp_host_table
),
2454 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2455 build_fold_addr_expr (hsa_img_descriptor
)),
2458 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts
, DEFAULT_INIT_PRIORITY
);
2460 tree offload_unregister
2461 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER
);
2462 gcc_checking_assert (offload_unregister
);
2464 tree
*hsa_dtor_stmts
= hsa_get_dtor_statements ();
2465 append_to_statement_list
2466 (build_call_expr (offload_unregister
, 4,
2467 build_int_cstu (unsigned_type_node
,
2468 GOMP_VERSION_PACK (GOMP_VERSION
,
2470 build_fold_addr_expr (hsa_libgomp_host_table
),
2471 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2472 build_fold_addr_expr (hsa_img_descriptor
)),
2474 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts
, DEFAULT_INIT_PRIORITY
);
2477 /* Emit the brig module we have compiled to a section in the final assembly and
2478 also create a compile unit static constructor that will register the brig
2479 module with libgomp. */
2482 hsa_output_brig (void)
2484 section
*saved_section
;
2486 if (!brig_initialized
)
2489 for (unsigned i
= 0; i
< function_call_linkage
.length (); i
++)
2491 function_linkage_pair p
= function_call_linkage
[i
];
2493 BrigCodeOffset32_t
*func_offset
= function_offsets
->get (p
.function_decl
);
2494 gcc_assert (*func_offset
);
2495 BrigOperandCodeRef
*code_ref
2496 = (BrigOperandCodeRef
*) (brig_operand
.get_ptr_by_offset (p
.offset
));
2497 gcc_assert (code_ref
->base
.kind
== BRIG_KIND_OPERAND_CODE_REF
);
2498 code_ref
->ref
= lendian32 (*func_offset
);
2501 /* Iterate all function declarations and if we meet a function that should
2502 have module linkage and we are unable to emit HSAIL for the function,
2503 then change the linkage to program linkage. Doing so, we will emit
2504 a valid BRIG image. */
2505 if (hsa_failed_functions
!= NULL
&& emitted_declarations
!= NULL
)
2506 for (hash_map
<tree
, BrigDirectiveExecutable
*>::iterator it
2507 = emitted_declarations
->begin ();
2508 it
!= emitted_declarations
->end ();
2511 if (hsa_failed_functions
->contains ((*it
).first
))
2512 (*it
).second
->linkage
= BRIG_LINKAGE_PROGRAM
;
2515 saved_section
= in_section
;
2517 switch_to_section (get_section (BRIG_ELF_SECTION_NAME
, SECTION_NOTYPE
, NULL
));
2519 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, BRIG_LABEL_STRING
, 1);
2520 ASM_OUTPUT_LABEL (asm_out_file
, tmp_name
);
2521 tree brig_id
= get_identifier (tmp_name
);
2522 tree brig_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, brig_id
,
2524 SET_DECL_ASSEMBLER_NAME (brig_decl
, brig_id
);
2525 TREE_ADDRESSABLE (brig_decl
) = 1;
2526 TREE_READONLY (brig_decl
) = 1;
2527 DECL_ARTIFICIAL (brig_decl
) = 1;
2528 DECL_IGNORED_P (brig_decl
) = 1;
2529 TREE_STATIC (brig_decl
) = 1;
2530 TREE_PUBLIC (brig_decl
) = 0;
2531 TREE_USED (brig_decl
) = 1;
2532 DECL_INITIAL (brig_decl
) = brig_decl
;
2533 TREE_ASM_WRITTEN (brig_decl
) = 1;
2535 BrigModuleHeader module_header
;
2536 memcpy (&module_header
.identification
, "HSA BRIG",
2537 sizeof (module_header
.identification
));
2538 module_header
.brigMajor
= lendian32 (BRIG_VERSION_BRIG_MAJOR
);
2539 module_header
.brigMinor
= lendian32 (BRIG_VERSION_BRIG_MINOR
);
2540 uint64_t section_index
[3];
2542 int data_padding
, code_padding
, operand_padding
;
2543 data_padding
= HSA_SECTION_ALIGNMENT
2544 - brig_data
.total_size
% HSA_SECTION_ALIGNMENT
;
2545 code_padding
= HSA_SECTION_ALIGNMENT
2546 - brig_code
.total_size
% HSA_SECTION_ALIGNMENT
;
2547 operand_padding
= HSA_SECTION_ALIGNMENT
2548 - brig_operand
.total_size
% HSA_SECTION_ALIGNMENT
;
2550 uint64_t module_size
= sizeof (module_header
)
2551 + sizeof (section_index
)
2552 + brig_data
.total_size
2554 + brig_code
.total_size
2556 + brig_operand
.total_size
2558 gcc_assert ((module_size
% 16) == 0);
2559 module_header
.byteCount
= lendian64 (module_size
);
2560 memset (&module_header
.hash
, 0, sizeof (module_header
.hash
));
2561 module_header
.reserved
= 0;
2562 module_header
.sectionCount
= lendian32 (3);
2563 module_header
.sectionIndex
= lendian64 (sizeof (module_header
));
2564 assemble_string ((const char *) &module_header
, sizeof (module_header
));
2565 uint64_t off
= sizeof (module_header
) + sizeof (section_index
);
2566 section_index
[0] = lendian64 (off
);
2567 off
+= brig_data
.total_size
+ data_padding
;
2568 section_index
[1] = lendian64 (off
);
2569 off
+= brig_code
.total_size
+ code_padding
;
2570 section_index
[2] = lendian64 (off
);
2571 assemble_string ((const char *) §ion_index
, sizeof (section_index
));
2573 char padding
[HSA_SECTION_ALIGNMENT
];
2574 memset (padding
, 0, sizeof (padding
));
2576 brig_data
.output ();
2577 assemble_string (padding
, data_padding
);
2578 brig_code
.output ();
2579 assemble_string (padding
, code_padding
);
2580 brig_operand
.output ();
2581 assemble_string (padding
, operand_padding
);
2584 switch_to_section (saved_section
);
2586 hsa_output_libgomp_mapping (brig_decl
);
2588 hsa_free_decl_kernel_mapping ();
2589 brig_release_data ();
2590 hsa_deinit_compilation_unit_data ();
2592 delete emitted_declarations
;
2593 emitted_declarations
= NULL
;
2594 delete function_offsets
;
2595 function_offsets
= NULL
;