1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "hash-table.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
37 #include "basic-block.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
46 #include "print-tree.h"
47 #include "symbol-summary.h"
48 #include "hsa-common.h"
49 #include "gomp-constants.h"
51 /* Convert VAL to little endian form, if necessary. */
54 lendian16 (uint16_t val
)
56 #if GCC_VERSION >= 4008
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 return __builtin_bswap16 (val
);
61 #else /* __ORDER_PDP_ENDIAN__ */
65 // provide a safe slower default, with shifts and masking
66 #ifndef WORDS_BIGENDIAN
69 return (val
>> 8) | (val
<< 8);
74 /* Convert VAL to little endian form, if necessary. */
77 lendian32 (uint32_t val
)
79 #if GCC_VERSION >= 4006
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83 return __builtin_bswap32 (val
);
84 #else /* __ORDER_PDP_ENDIAN__ */
85 return (val
>> 16) | (val
<< 16);
88 // provide a safe slower default, with shifts and masking
89 #ifndef WORDS_BIGENDIAN
92 val
= ((val
& 0xff00ff00) >> 8) | ((val
& 0xff00ff) << 8);
93 return (val
>> 16) | (val
<< 16);
98 /* Convert VAL to little endian form, if necessary. */
101 lendian64 (uint64_t val
)
103 #if GCC_VERSION >= 4006
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107 return __builtin_bswap64 (val
);
108 #else /* __ORDER_PDP_ENDIAN__ */
109 return (((val
& 0xffffll
) << 48)
110 | ((val
& 0xffff0000ll
) << 16)
111 | ((val
& 0xffff00000000ll
) >> 16)
112 | ((val
& 0xffff000000000000ll
) >> 48));
115 // provide a safe slower default, with shifts and masking
116 #ifndef WORDS_BIGENDIAN
119 val
= (((val
& 0xff00ff00ff00ff00ll
) >> 8)
120 | ((val
& 0x00ff00ff00ff00ffll
) << 8));
121 val
= ((( val
& 0xffff0000ffff0000ll
) >> 16)
122 | (( val
& 0x0000ffff0000ffffll
) << 16));
123 return (val
>> 32) | (val
<< 32);
128 #define BRIG_ELF_SECTION_NAME ".brig"
129 #define BRIG_LABEL_STRING "hsa_brig"
130 #define BRIG_SECTION_DATA_NAME "hsa_data"
131 #define BRIG_SECTION_CODE_NAME "hsa_code"
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136 /* Required HSA section alignment. */
138 #define HSA_SECTION_ALIGNMENT 16
140 /* Chunks of BRIG binary data. */
142 struct hsa_brig_data_chunk
144 /* Size of the data already stored into a chunk. */
147 /* Pointer to the data. */
151 /* Structure representing a BRIG section, holding and writing its data. */
153 class hsa_brig_section
156 /* Section name that will be output to the BRIG. */
157 const char *section_name
;
158 /* Size in bytes of all data stored in the section. */
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count
;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta
;
165 void init (const char *name
);
168 unsigned add (const void *data
, unsigned len
, void **output
= NULL
);
169 void round_size_up (int factor
);
170 void *get_ptr_by_offset (unsigned int offset
);
173 void allocate_new_chunk ();
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec
<struct hsa_brig_data_chunk
> chunks
;
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk
*cur_chunk
;
182 static struct hsa_brig_section brig_data
, brig_code
, brig_operand
;
183 static uint32_t brig_insn_count
;
184 static bool brig_initialized
= false;
186 /* Mapping between emitted HSA functions and their offset in code segment. */
187 static hash_map
<tree
, BrigCodeOffset32_t
> *function_offsets
;
189 /* Hash map of emitted function declarations. */
190 static hash_map
<tree
, BrigDirectiveExecutable
*> *emitted_declarations
;
192 /* Hash table of emitted internal function declaration offsets. */
193 hash_table
<hsa_internal_fn_hasher
> *hsa_emitted_internal_decls
;
195 /* List of sbr instructions. */
196 static vec
<hsa_insn_sbr
*> *switch_instructions
;
198 struct function_linkage_pair
200 function_linkage_pair (tree decl
, unsigned int off
)
201 : function_decl (decl
), offset (off
) {}
203 /* Declaration of called function. */
206 /* Offset in operand section. */
210 /* Vector of function calls where we need to resolve function offsets. */
211 static auto_vec
<function_linkage_pair
> function_call_linkage
;
213 /* Add a new chunk, allocate data for it and initialize it. */
216 hsa_brig_section::allocate_new_chunk ()
218 struct hsa_brig_data_chunk new_chunk
;
220 new_chunk
.data
= XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE
);
222 cur_chunk
= chunks
.safe_push (new_chunk
);
225 /* Initialize the brig section. */
228 hsa_brig_section::init (const char *name
)
231 /* While the following computation is basically wrong, because the intent
232 certainly wasn't to have the first character of name and padding, which
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
234 this is what the disassembler expects. */
235 total_size
= sizeof (BrigSectionHeader
) + strlen (section_name
);
237 allocate_new_chunk ();
238 header_byte_delta
= total_size
;
240 header_byte_count
= total_size
;
243 /* Free all data in the section. */
246 hsa_brig_section::release ()
248 for (unsigned i
= 0; i
< chunks
.length (); i
++)
249 free (chunks
[i
].data
);
254 /* Write the section to the output file to a section with the name given at
255 initialization. Switches the output section and does not restore it. */
258 hsa_brig_section::output ()
260 struct BrigSectionHeader section_header
;
263 section_header
.byteCount
= lendian64 (total_size
);
264 section_header
.headerByteCount
= lendian32 (header_byte_count
);
265 section_header
.nameLength
= lendian32 (strlen (section_name
));
266 assemble_string ((const char *) §ion_header
, 16);
267 assemble_string (section_name
, (section_header
.nameLength
));
268 memset (&padding
, 0, sizeof (padding
));
269 /* This is also a consequence of the wrong header size computation described
270 in a comment in hsa_brig_section::init. */
271 assemble_string (padding
, 8);
272 for (unsigned i
= 0; i
< chunks
.length (); i
++)
273 assemble_string (chunks
[i
].data
, chunks
[i
].size
);
276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
278 the place where DATA was actually stored. */
281 hsa_brig_section::add (const void *data
, unsigned len
, void **output
)
283 unsigned offset
= total_size
;
285 gcc_assert (len
<= BRIG_CHUNK_MAX_SIZE
);
286 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- len
))
287 allocate_new_chunk ();
289 char *dst
= cur_chunk
->data
+ cur_chunk
->size
;
290 memcpy (dst
, data
, len
);
293 cur_chunk
->size
+= len
;
299 /* Add padding to section so that its size is divisible by FACTOR. */
302 hsa_brig_section::round_size_up (int factor
)
304 unsigned padding
, res
= total_size
% factor
;
309 padding
= factor
- res
;
310 total_size
+= padding
;
311 if (cur_chunk
->size
> (BRIG_CHUNK_MAX_SIZE
- padding
))
313 padding
-= BRIG_CHUNK_MAX_SIZE
- cur_chunk
->size
;
314 cur_chunk
->size
= BRIG_CHUNK_MAX_SIZE
;
315 allocate_new_chunk ();
318 cur_chunk
->size
+= padding
;
321 /* Return pointer to data by global OFFSET in the section. */
324 hsa_brig_section::get_ptr_by_offset (unsigned int offset
)
326 gcc_assert (offset
< total_size
);
327 offset
-= header_byte_delta
;
330 for (i
= 0; offset
>= chunks
[i
].size
; i
++)
331 offset
-= chunks
[i
].size
;
333 return chunks
[i
].data
+ offset
;
336 /* BRIG string data hashing. */
338 struct brig_string_slot
346 /* Hash table helpers. */
348 struct brig_string_slot_hasher
: pointer_hash
<brig_string_slot
>
350 static inline hashval_t
hash (const value_type
);
351 static inline bool equal (const value_type
, const compare_type
);
352 static inline void remove (value_type
);
355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
356 to support strings that may not end in '\0'. */
359 brig_string_slot_hasher::hash (const value_type ds
)
361 hashval_t r
= ds
->len
;
364 for (i
= 0; i
< ds
->len
; i
++)
365 r
= r
* 67 + (unsigned) ds
->s
[i
] - 113;
366 r
= r
* 67 + (unsigned) ds
->prefix
- 113;
370 /* Returns nonzero if DS1 and DS2 are equal. */
373 brig_string_slot_hasher::equal (const value_type ds1
, const compare_type ds2
)
375 if (ds1
->len
== ds2
->len
)
376 return ds1
->prefix
== ds2
->prefix
377 && memcmp (ds1
->s
, ds2
->s
, ds1
->len
) == 0;
382 /* Deallocate memory for DS upon its removal. */
385 brig_string_slot_hasher::remove (value_type ds
)
387 free (const_cast<char *> (ds
->s
));
391 /* Hash for strings we output in order not to duplicate them needlessly. */
393 static hash_table
<brig_string_slot_hasher
> *brig_string_htab
;
395 /* Emit a null terminated string STR to the data section and return its
396 offset in it. If PREFIX is non-zero, output it just before STR too.
397 Sanitize the string if SANITIZE option is set to true. */
400 brig_emit_string (const char *str
, char prefix
= 0, bool sanitize
= true)
402 unsigned slen
= strlen (str
);
403 unsigned offset
, len
= slen
+ (prefix
? 1 : 0);
404 uint32_t hdr_len
= lendian32 (len
);
405 brig_string_slot s_slot
;
406 brig_string_slot
**slot
;
409 str2
= xstrdup (str
);
412 hsa_sanitize_name (str2
);
415 s_slot
.prefix
= prefix
;
418 slot
= brig_string_htab
->find_slot (&s_slot
, INSERT
);
421 brig_string_slot
*new_slot
= XCNEW (brig_string_slot
);
423 /* In theory we should fill in BrigData but that would mean copying
424 the string to a buffer for no reason, so we just emulate it. */
425 offset
= brig_data
.add (&hdr_len
, sizeof (hdr_len
));
427 brig_data
.add (&prefix
, 1);
429 brig_data
.add (str2
, slen
);
430 brig_data
.round_size_up (4);
432 /* TODO: could use the string we just copied into
433 brig_string->cur_chunk */
435 new_slot
->len
= slen
;
436 new_slot
->prefix
= prefix
;
437 new_slot
->offset
= offset
;
442 offset
= (*slot
)->offset
;
449 /* Linked list of queued operands. */
451 static struct operand_queue
453 /* First from the chain of queued operands. */
454 hsa_op_base
*first_op
, *last_op
;
456 /* The offset at which the next operand will be enqueued. */
457 unsigned projected_size
;
461 /* Unless already initialized, initialize infrastructure to produce BRIG. */
468 if (brig_initialized
)
471 brig_string_htab
= new hash_table
<brig_string_slot_hasher
> (37);
472 brig_data
.init (BRIG_SECTION_DATA_NAME
);
473 brig_code
.init (BRIG_SECTION_CODE_NAME
);
474 brig_operand
.init (BRIG_SECTION_OPERAND_NAME
);
475 brig_initialized
= true;
477 struct BrigDirectiveModule moddir
;
478 memset (&moddir
, 0, sizeof (moddir
));
479 moddir
.base
.byteCount
= lendian16 (sizeof (moddir
));
482 if (main_input_filename
&& *main_input_filename
!= '\0')
484 const char *part
= strrchr (main_input_filename
, '/');
486 part
= main_input_filename
;
489 modname
= concat ("&__hsa_module_", part
, NULL
);
490 char *extension
= strchr (modname
, '.');
494 /* As in LTO mode, we have to emit a different module names. */
497 part
= strrchr (asm_file_name
, '/');
499 part
= asm_file_name
;
503 modname2
= xasprintf ("%s_%s", modname
, part
);
508 hsa_sanitize_name (modname
);
509 moddir
.name
= brig_emit_string (modname
);
513 moddir
.name
= brig_emit_string ("__hsa_module_unnamed", '&');
514 moddir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_MODULE
);
515 moddir
.hsailMajor
= lendian32 (BRIG_VERSION_HSAIL_MAJOR
);
516 moddir
.hsailMinor
= lendian32 (BRIG_VERSION_HSAIL_MINOR
);
517 moddir
.profile
= hsa_full_profile_p () ? BRIG_PROFILE_FULL
: BRIG_PROFILE_BASE
;
518 if (hsa_machine_large_p ())
519 moddir
.machineModel
= BRIG_MACHINE_LARGE
;
521 moddir
.machineModel
= BRIG_MACHINE_SMALL
;
522 moddir
.defaultFloatRound
= BRIG_ROUND_FLOAT_DEFAULT
;
523 brig_code
.add (&moddir
, sizeof (moddir
));
526 /* Free all BRIG data. */
529 brig_release_data (void)
531 delete brig_string_htab
;
532 brig_data
.release ();
533 brig_code
.release ();
534 brig_operand
.release ();
536 brig_initialized
= 0;
539 /* Enqueue operation OP. Return the offset at which it will be stored. */
542 enqueue_op (hsa_op_base
*op
)
546 if (op
->m_brig_op_offset
)
547 return op
->m_brig_op_offset
;
549 ret
= op_queue
.projected_size
;
550 op
->m_brig_op_offset
= op_queue
.projected_size
;
552 if (!op_queue
.first_op
)
553 op_queue
.first_op
= op
;
555 op_queue
.last_op
->m_next
= op
;
556 op_queue
.last_op
= op
;
558 if (is_a
<hsa_op_immed
*> (op
))
559 op_queue
.projected_size
+= sizeof (struct BrigOperandConstantBytes
);
560 else if (is_a
<hsa_op_reg
*> (op
))
561 op_queue
.projected_size
+= sizeof (struct BrigOperandRegister
);
562 else if (is_a
<hsa_op_address
*> (op
))
563 op_queue
.projected_size
+= sizeof (struct BrigOperandAddress
);
564 else if (is_a
<hsa_op_code_ref
*> (op
))
565 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeRef
);
566 else if (is_a
<hsa_op_code_list
*> (op
))
567 op_queue
.projected_size
+= sizeof (struct BrigOperandCodeList
);
568 else if (is_a
<hsa_op_operand_list
*> (op
))
569 op_queue
.projected_size
+= sizeof (struct BrigOperandOperandList
);
575 static void emit_immediate_operand (hsa_op_immed
*imm
);
577 /* Emit directive describing a symbol if it has not been emitted already.
578 Return the offset of the directive. */
581 emit_directive_variable (struct hsa_symbol
*symbol
)
583 struct BrigDirectiveVariable dirvar
;
584 unsigned name_offset
;
585 static unsigned res_name_offset
;
587 if (symbol
->m_directive_offset
)
588 return symbol
->m_directive_offset
;
590 memset (&dirvar
, 0, sizeof (dirvar
));
591 dirvar
.base
.byteCount
= lendian16 (sizeof (dirvar
));
592 dirvar
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE
);
593 dirvar
.allocation
= symbol
->m_allocation
;
595 char prefix
= symbol
->m_global_scope_p
? '&' : '%';
597 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == RESULT_DECL
)
599 if (res_name_offset
== 0)
600 res_name_offset
= brig_emit_string (symbol
->m_name
, '%');
601 name_offset
= res_name_offset
;
603 else if (symbol
->m_name
)
604 name_offset
= brig_emit_string (symbol
->m_name
, prefix
);
608 snprintf (buf
, 64, "__%s_%i", hsa_seg_name (symbol
->m_segment
),
609 symbol
->m_name_number
);
610 name_offset
= brig_emit_string (buf
, prefix
);
613 dirvar
.name
= lendian32 (name_offset
);
615 if (symbol
->m_decl
&& TREE_CODE (symbol
->m_decl
) == CONST_DECL
)
617 hsa_op_immed
*tmp
= new hsa_op_immed (DECL_INITIAL (symbol
->m_decl
));
618 dirvar
.init
= lendian32 (enqueue_op (tmp
));
622 dirvar
.type
= lendian16 (symbol
->m_type
);
623 dirvar
.segment
= symbol
->m_segment
;
624 dirvar
.align
= symbol
->m_align
;
625 dirvar
.linkage
= symbol
->m_linkage
;
626 dirvar
.dim
.lo
= symbol
->m_dim
;
627 dirvar
.dim
.hi
= symbol
->m_dim
>> 32;
629 /* Global variables are just declared and linked via HSA runtime. */
630 if (symbol
->m_linkage
!= BRIG_ALLOCATION_PROGRAM
)
631 dirvar
.modifier
|= BRIG_VARIABLE_DEFINITION
;
634 if (symbol
->m_cst_value
)
636 dirvar
.modifier
|= BRIG_VARIABLE_CONST
;
637 dirvar
.init
= lendian32 (enqueue_op (symbol
->m_cst_value
));
640 symbol
->m_directive_offset
= brig_code
.add (&dirvar
, sizeof (dirvar
));
641 return symbol
->m_directive_offset
;
644 /* Emit directives describing either a function declaration or definition F and
645 return the produced BrigDirectiveExecutable structure. The function does
646 not take into account any instructions when calculating nextModuleEntry
647 field of the produced BrigDirectiveExecutable structure so when emitting
648 actual definitions, this field needs to be updated after all of the function
649 is actually added to the code section. */
651 static BrigDirectiveExecutable
*
652 emit_function_directives (hsa_function_representation
*f
, bool is_declaration
)
654 struct BrigDirectiveExecutable fndir
;
655 unsigned name_offset
, inarg_off
, scoped_off
, next_toplev_off
;
660 if (!f
->m_declaration_p
)
661 for (int i
= 0; f
->m_global_symbols
.iterate (i
, &sym
); i
++)
663 gcc_assert (!sym
->m_emitted_to_brig
);
664 sym
->m_emitted_to_brig
= true;
665 emit_directive_variable (sym
);
669 name_offset
= brig_emit_string (f
->m_name
, '&');
670 inarg_off
= brig_code
.total_size
+ sizeof (fndir
)
671 + (f
->m_output_arg
? sizeof (struct BrigDirectiveVariable
) : 0);
672 scoped_off
= inarg_off
673 + f
->m_input_args
.length () * sizeof (struct BrigDirectiveVariable
);
675 if (!f
->m_declaration_p
)
677 count
+= f
->m_spill_symbols
.length ();
678 count
+= f
->m_private_variables
.length ();
681 next_toplev_off
= scoped_off
+ count
* sizeof (struct BrigDirectiveVariable
);
683 memset (&fndir
, 0, sizeof (fndir
));
684 fndir
.base
.byteCount
= lendian16 (sizeof (fndir
));
685 fndir
.base
.kind
= lendian16 (f
->m_kern_p
? BRIG_KIND_DIRECTIVE_KERNEL
686 : BRIG_KIND_DIRECTIVE_FUNCTION
);
687 fndir
.name
= lendian32 (name_offset
);
688 fndir
.inArgCount
= lendian16 (f
->m_input_args
.length ());
689 fndir
.outArgCount
= lendian16 (f
->m_output_arg
? 1 : 0);
690 fndir
.firstInArg
= lendian32 (inarg_off
);
691 fndir
.firstCodeBlockEntry
= lendian32 (scoped_off
);
692 fndir
.nextModuleEntry
= lendian32 (next_toplev_off
);
693 fndir
.linkage
= f
->get_linkage ();
694 if (!f
->m_declaration_p
)
695 fndir
.modifier
|= BRIG_EXECUTABLE_DEFINITION
;
696 memset (&fndir
.reserved
, 0, sizeof (fndir
.reserved
));
698 /* Once we put a definition of function_offsets, we should not overwrite
699 it with a declaration of the function. */
700 if (f
->m_internal_fn
== NULL
)
702 if (!function_offsets
->get (f
->m_decl
) || !is_declaration
)
703 function_offsets
->put (f
->m_decl
, brig_code
.total_size
);
707 /* Internal function. */
708 hsa_internal_fn
**slot
709 = hsa_emitted_internal_decls
->find_slot (f
->m_internal_fn
, INSERT
);
710 hsa_internal_fn
*int_fn
= new hsa_internal_fn (f
->m_internal_fn
);
711 int_fn
->m_offset
= brig_code
.total_size
;
715 brig_code
.add (&fndir
, sizeof (fndir
), &ptr_to_fndir
);
718 emit_directive_variable (f
->m_output_arg
);
719 for (unsigned i
= 0; i
< f
->m_input_args
.length (); i
++)
720 emit_directive_variable (f
->m_input_args
[i
]);
722 if (!f
->m_declaration_p
)
724 for (int i
= 0; f
->m_spill_symbols
.iterate (i
, &sym
); i
++)
726 emit_directive_variable (sym
);
729 for (unsigned i
= 0; i
< f
->m_private_variables
.length (); i
++)
731 emit_directive_variable (f
->m_private_variables
[i
]);
736 return (BrigDirectiveExecutable
*) ptr_to_fndir
;
739 /* Emit a label directive for the given HBB. We assume it is about to start on
740 the current offset in the code section. */
743 emit_bb_label_directive (hsa_bb
*hbb
)
745 struct BrigDirectiveLabel lbldir
;
747 lbldir
.base
.byteCount
= lendian16 (sizeof (lbldir
));
748 lbldir
.base
.kind
= lendian16 (BRIG_KIND_DIRECTIVE_LABEL
);
750 snprintf (buf
, 32, "BB_%u_%i", DECL_UID (current_function_decl
),
752 lbldir
.name
= lendian32 (brig_emit_string (buf
, '@'));
754 hbb
->m_label_ref
.m_directive_offset
= brig_code
.add (&lbldir
,
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
760 holding such, for constants and registers. */
763 regtype_for_type (BrigType16_t t
)
782 case BRIG_TYPE_U16X2
:
784 case BRIG_TYPE_S16X2
:
785 case BRIG_TYPE_F16X2
:
786 return BRIG_TYPE_B32
;
793 case BRIG_TYPE_U16X4
:
794 case BRIG_TYPE_U32X2
:
796 case BRIG_TYPE_S16X4
:
797 case BRIG_TYPE_S32X2
:
798 case BRIG_TYPE_F16X4
:
799 case BRIG_TYPE_F32X2
:
800 return BRIG_TYPE_B64
;
803 case BRIG_TYPE_U8X16
:
804 case BRIG_TYPE_U16X8
:
805 case BRIG_TYPE_U32X4
:
806 case BRIG_TYPE_U64X2
:
807 case BRIG_TYPE_S8X16
:
808 case BRIG_TYPE_S16X8
:
809 case BRIG_TYPE_S32X4
:
810 case BRIG_TYPE_S64X2
:
811 case BRIG_TYPE_F16X8
:
812 case BRIG_TYPE_F32X4
:
813 case BRIG_TYPE_F64X2
:
814 return BRIG_TYPE_B128
;
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
822 an immediate constant (so it must not be B1). */
825 hsa_get_imm_brig_type_len (BrigType16_t type
)
827 BrigType16_t base_type
= type
& BRIG_TYPE_BASE_MASK
;
828 BrigType16_t pack_type
= type
& BRIG_TYPE_PACK_MASK
;
832 case BRIG_TYPE_PACK_NONE
:
834 case BRIG_TYPE_PACK_32
:
836 case BRIG_TYPE_PACK_64
:
838 case BRIG_TYPE_PACK_128
:
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873 If NEED_LEN is not equal to zero, shrink or extend the value
874 to NEED_LEN bytes. Return how many bytes were written. */
877 emit_immediate_scalar_to_buffer (tree value
, char *data
, unsigned need_len
)
879 union hsa_bytes bytes
;
881 memset (&bytes
, 0, sizeof (bytes
));
882 tree type
= TREE_TYPE (value
);
883 gcc_checking_assert (TREE_CODE (type
) != VECTOR_TYPE
);
885 unsigned data_len
= tree_to_uhwi (TYPE_SIZE (type
)) / BITS_PER_UNIT
;
886 if (INTEGRAL_TYPE_P (type
)
887 || (POINTER_TYPE_P (type
) && TREE_CODE (value
) == INTEGER_CST
))
891 bytes
.b8
= (uint8_t) TREE_INT_CST_LOW (value
);
894 bytes
.b16
= (uint16_t) TREE_INT_CST_LOW (value
);
897 bytes
.b32
= (uint32_t) TREE_INT_CST_LOW (value
);
900 bytes
.b64
= (uint64_t) TREE_INT_CST_LOW (value
);
905 else if (SCALAR_FLOAT_TYPE_P (type
))
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
913 unsigned int_len
= GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type
));
914 /* There are always 32 bits in each long, no matter the size of
918 real_to_target (tmp
, TREE_REAL_CST_PTR (value
), TYPE_MODE (type
));
921 bytes
.b32
= (uint32_t) tmp
[0];
924 bytes
.b64
= (uint64_t)(uint32_t) tmp
[1];
926 bytes
.b64
|= (uint32_t) tmp
[0];
938 memcpy (data
, &bytes
, len
);
943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size
)
946 *brig_repr_size
= hsa_get_imm_brig_type_len (m_type
);
948 if (m_tree_value
!= NULL_TREE
)
950 /* Update brig_repr_size for special tree values. */
951 if (TREE_CODE (m_tree_value
) == STRING_CST
)
952 *brig_repr_size
= TREE_STRING_LENGTH (m_tree_value
);
953 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value
)));
957 unsigned total_len
= *brig_repr_size
;
959 /* As we can have a constructor with fewer elements, fill the memory
961 brig_repr
= XCNEWVEC (char, total_len
);
964 if (TREE_CODE (m_tree_value
) == VECTOR_CST
)
966 int i
, num
= VECTOR_CST_NELTS (m_tree_value
);
967 for (i
= 0; i
< num
; i
++)
969 tree v
= VECTOR_CST_ELT (m_tree_value
, i
);
970 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
974 /* Vectors should have the exact size. */
975 gcc_assert (total_len
== 0);
977 else if (TREE_CODE (m_tree_value
) == STRING_CST
)
978 memcpy (brig_repr
, TREE_STRING_POINTER (m_tree_value
),
979 TREE_STRING_LENGTH (m_tree_value
));
980 else if (TREE_CODE (m_tree_value
) == COMPLEX_CST
)
982 gcc_assert (total_len
% 2 == 0);
985 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value
), p
,
988 gcc_assert (actual
== total_len
/ 2);
992 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value
), p
,
994 gcc_assert (actual
== total_len
/ 2);
996 else if (TREE_CODE (m_tree_value
) == CONSTRUCTOR
)
998 unsigned len
= CONSTRUCTOR_NELTS (m_tree_value
);
999 for (unsigned i
= 0; i
< len
; i
++)
1001 tree v
= CONSTRUCTOR_ELT (m_tree_value
, i
)->value
;
1002 unsigned actual
= emit_immediate_scalar_to_buffer (v
, p
, 0);
1003 total_len
-= actual
;
1008 emit_immediate_scalar_to_buffer (m_tree_value
, p
, total_len
);
1014 switch (*brig_repr_size
)
1017 bytes
.b8
= (uint8_t) m_int_value
;
1020 bytes
.b16
= (uint16_t) m_int_value
;
1023 bytes
.b32
= (uint32_t) m_int_value
;
1026 bytes
.b64
= (uint64_t) m_int_value
;
1032 brig_repr
= XNEWVEC (char, *brig_repr_size
);
1033 memcpy (brig_repr
, &bytes
, *brig_repr_size
);
1039 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1040 have been massaged to comply with various HSA/BRIG type requirements, so the
1041 only important aspect of that is the length (because HSAIL might expect
1042 smaller constants or become bit-data). The data should be represented
1043 according to what is in the tree representation. */
1046 emit_immediate_operand (hsa_op_immed
*imm
)
1048 unsigned brig_repr_size
;
1049 char *brig_repr
= imm
->emit_to_buffer (&brig_repr_size
);
1050 struct BrigOperandConstantBytes out
;
1052 memset (&out
, 0, sizeof (out
));
1053 out
.base
.byteCount
= lendian16 (sizeof (out
));
1054 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES
);
1055 uint32_t byteCount
= lendian32 (brig_repr_size
);
1056 out
.type
= lendian16 (imm
->m_type
);
1057 out
.bytes
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1058 brig_operand
.add (&out
, sizeof (out
));
1059 brig_data
.add (brig_repr
, brig_repr_size
);
1060 brig_data
.round_size_up (4);
1065 /* Emit a register BRIG operand REG. */
1068 emit_register_operand (hsa_op_reg
*reg
)
1070 struct BrigOperandRegister out
;
1072 out
.base
.byteCount
= lendian16 (sizeof (out
));
1073 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_REGISTER
);
1074 out
.regNum
= lendian32 (reg
->m_hard_num
);
1076 switch (regtype_for_type (reg
->m_type
))
1079 out
.regKind
= BRIG_REGISTER_KIND_SINGLE
;
1082 out
.regKind
= BRIG_REGISTER_KIND_DOUBLE
;
1084 case BRIG_TYPE_B128
:
1085 out
.regKind
= BRIG_REGISTER_KIND_QUAD
;
1088 out
.regKind
= BRIG_REGISTER_KIND_CONTROL
;
1094 brig_operand
.add (&out
, sizeof (out
));
1097 /* Emit an address BRIG operand ADDR. */
1100 emit_address_operand (hsa_op_address
*addr
)
1102 struct BrigOperandAddress out
;
1104 out
.base
.byteCount
= lendian16 (sizeof (out
));
1105 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_ADDRESS
);
1106 out
.symbol
= addr
->m_symbol
1107 ? lendian32 (emit_directive_variable (addr
->m_symbol
)) : 0;
1108 out
.reg
= addr
->m_reg
? lendian32 (enqueue_op (addr
->m_reg
)) : 0;
1110 if (sizeof (addr
->m_imm_offset
) == 8)
1112 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1113 out
.offset
.hi
= lendian32 (addr
->m_imm_offset
>> 32);
1117 gcc_assert (sizeof (addr
->m_imm_offset
) == 4);
1118 out
.offset
.lo
= lendian32 (addr
->m_imm_offset
);
1122 brig_operand
.add (&out
, sizeof (out
));
1125 /* Emit a code reference operand REF. */
1128 emit_code_ref_operand (hsa_op_code_ref
*ref
)
1130 struct BrigOperandCodeRef out
;
1132 out
.base
.byteCount
= lendian16 (sizeof (out
));
1133 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_REF
);
1134 out
.ref
= lendian32 (ref
->m_directive_offset
);
1135 brig_operand
.add (&out
, sizeof (out
));
1138 /* Emit a code list operand CODE_LIST. */
1141 emit_code_list_operand (hsa_op_code_list
*code_list
)
1143 struct BrigOperandCodeList out
;
1144 unsigned args
= code_list
->m_offsets
.length ();
1146 for (unsigned i
= 0; i
< args
; i
++)
1147 gcc_assert (code_list
->m_offsets
[i
]);
1149 out
.base
.byteCount
= lendian16 (sizeof (out
));
1150 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_CODE_LIST
);
1152 uint32_t byteCount
= lendian32 (4 * args
);
1154 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1155 brig_data
.add (code_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1156 brig_data
.round_size_up (4);
1157 brig_operand
.add (&out
, sizeof (out
));
1160 /* Emit an operand list operand OPERAND_LIST. */
1163 emit_operand_list_operand (hsa_op_operand_list
*operand_list
)
1165 struct BrigOperandOperandList out
;
1166 unsigned args
= operand_list
->m_offsets
.length ();
1168 for (unsigned i
= 0; i
< args
; i
++)
1169 gcc_assert (operand_list
->m_offsets
[i
]);
1171 out
.base
.byteCount
= lendian16 (sizeof (out
));
1172 out
.base
.kind
= lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST
);
1174 uint32_t byteCount
= lendian32 (4 * args
);
1176 out
.elements
= lendian32 (brig_data
.add (&byteCount
, sizeof (byteCount
)));
1177 brig_data
.add (operand_list
->m_offsets
.address (), args
* sizeof (uint32_t));
1178 brig_data
.round_size_up (4);
1179 brig_operand
.add (&out
, sizeof (out
));
1182 /* Emit all operands queued for writing. */
1185 emit_queued_operands (void)
1187 for (hsa_op_base
*op
= op_queue
.first_op
; op
; op
= op
->m_next
)
1189 gcc_assert (op
->m_brig_op_offset
== brig_operand
.total_size
);
1190 if (hsa_op_immed
*imm
= dyn_cast
<hsa_op_immed
*> (op
))
1191 emit_immediate_operand (imm
);
1192 else if (hsa_op_reg
*reg
= dyn_cast
<hsa_op_reg
*> (op
))
1193 emit_register_operand (reg
);
1194 else if (hsa_op_address
*addr
= dyn_cast
<hsa_op_address
*> (op
))
1195 emit_address_operand (addr
);
1196 else if (hsa_op_code_ref
*ref
= dyn_cast
<hsa_op_code_ref
*> (op
))
1197 emit_code_ref_operand (ref
);
1198 else if (hsa_op_code_list
*code_list
= dyn_cast
<hsa_op_code_list
*> (op
))
1199 emit_code_list_operand (code_list
);
1200 else if (hsa_op_operand_list
*l
= dyn_cast
<hsa_op_operand_list
*> (op
))
1201 emit_operand_list_operand (l
);
1207 /* Emit directives describing the function that is used for
1208 a function declaration. */
1210 static BrigDirectiveExecutable
*
1211 emit_function_declaration (tree decl
)
1213 hsa_function_representation
*f
= hsa_generate_function_declaration (decl
);
1215 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1216 emit_queued_operands ();
1223 /* Emit directives describing the function that is used for
1224 an internal function declaration. */
1226 static BrigDirectiveExecutable
*
1227 emit_internal_fn_decl (hsa_internal_fn
*fn
)
1229 hsa_function_representation
*f
= hsa_generate_internal_fn_decl (fn
);
1231 BrigDirectiveExecutable
*e
= emit_function_directives (f
, true);
1232 emit_queued_operands ();
1239 /* Enqueue all operands of INSN and return offset to BRIG data section
1240 to list of operand offsets. */
1243 emit_insn_operands (hsa_insn_basic
*insn
)
1245 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1248 unsigned l
= insn
->operand_count ();
1250 /* We have N operands so use 4 * N for the byte_count. */
1251 uint32_t byte_count
= lendian32 (4 * l
);
1252 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1255 operand_offsets
.safe_grow (l
);
1256 for (unsigned i
= 0; i
< l
; i
++)
1257 operand_offsets
[i
] = lendian32 (enqueue_op (insn
->get_op (i
)));
1259 brig_data
.add (operand_offsets
.address (),
1260 l
* sizeof (BrigOperandOffset32_t
));
1262 brig_data
.round_size_up (4);
1266 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1267 to BRIG data section to list of operand offsets. */
1270 emit_operands (hsa_op_base
*op0
, hsa_op_base
*op1
= NULL
,
1271 hsa_op_base
*op2
= NULL
)
1273 auto_vec
<BrigOperandOffset32_t
, HSA_BRIG_INT_STORAGE_OPERANDS
>
1276 gcc_checking_assert (op0
!= NULL
);
1277 operand_offsets
.safe_push (enqueue_op (op0
));
1281 operand_offsets
.safe_push (enqueue_op (op1
));
1283 operand_offsets
.safe_push (enqueue_op (op2
));
1286 unsigned l
= operand_offsets
.length ();
1288 /* We have N operands so use 4 * N for the byte_count. */
1289 uint32_t byte_count
= lendian32 (4 * l
);
1291 unsigned offset
= brig_data
.add (&byte_count
, sizeof (byte_count
));
1292 brig_data
.add (operand_offsets
.address (),
1293 l
* sizeof (BrigOperandOffset32_t
));
1295 brig_data
.round_size_up (4);
1300 /* Emit an HSA memory instruction and all necessary directives, schedule
1301 necessary operands for writing. */
1304 emit_memory_insn (hsa_insn_mem
*mem
)
1306 struct BrigInstMem repr
;
1307 gcc_checking_assert (mem
->operand_count () == 2);
1309 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1311 /* This is necessary because of the erroneous typedef of
1312 BrigMemoryModifier8_t which introduces padding which may then contain
1313 random stuff (which we do not want so that we can test things don't
1315 memset (&repr
, 0, sizeof (repr
));
1316 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1317 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1318 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1319 repr
.base
.type
= lendian16 (mem
->m_type
);
1320 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1323 repr
.segment
= addr
->m_symbol
->m_segment
;
1325 repr
.segment
= BRIG_SEGMENT_FLAT
;
1327 repr
.equivClass
= mem
->m_equiv_class
;
1328 repr
.align
= mem
->m_align
;
1329 if (mem
->m_opcode
== BRIG_OPCODE_LD
)
1330 repr
.width
= BRIG_WIDTH_1
;
1332 repr
.width
= BRIG_WIDTH_NONE
;
1333 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1334 brig_code
.add (&repr
, sizeof (repr
));
1338 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1339 necessary operands for writing. */
1342 emit_signal_insn (hsa_insn_signal
*mem
)
1344 struct BrigInstSignal repr
;
1346 memset (&repr
, 0, sizeof (repr
));
1347 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1348 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SIGNAL
);
1349 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1350 repr
.base
.type
= lendian16 (mem
->m_type
);
1351 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1353 repr
.memoryOrder
= mem
->m_memory_order
;
1354 repr
.signalOperation
= mem
->m_signalop
;
1355 repr
.signalType
= hsa_machine_large_p () ? BRIG_TYPE_SIG64
: BRIG_TYPE_SIG32
;
1357 brig_code
.add (&repr
, sizeof (repr
));
1361 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1362 necessary operands for writing. */
1365 emit_atomic_insn (hsa_insn_atomic
*mem
)
1367 struct BrigInstAtomic repr
;
1369 /* Either operand[0] or operand[1] must be an address operand. */
1370 hsa_op_address
*addr
= NULL
;
1371 if (is_a
<hsa_op_address
*> (mem
->get_op (0)))
1372 addr
= as_a
<hsa_op_address
*> (mem
->get_op (0));
1374 addr
= as_a
<hsa_op_address
*> (mem
->get_op (1));
1376 memset (&repr
, 0, sizeof (repr
));
1377 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1378 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ATOMIC
);
1379 repr
.base
.opcode
= lendian16 (mem
->m_opcode
);
1380 repr
.base
.type
= lendian16 (mem
->m_type
);
1381 repr
.base
.operands
= lendian32 (emit_insn_operands (mem
));
1384 repr
.segment
= addr
->m_symbol
->m_segment
;
1386 repr
.segment
= BRIG_SEGMENT_FLAT
;
1387 repr
.memoryOrder
= mem
->m_memoryorder
;
1388 repr
.memoryScope
= mem
->m_memoryscope
;
1389 repr
.atomicOperation
= mem
->m_atomicop
;
1391 brig_code
.add (&repr
, sizeof (repr
));
1395 /* Emit an HSA LDA instruction and all necessary directives, schedule
1396 necessary operands for writing. */
1399 emit_addr_insn (hsa_insn_basic
*insn
)
1401 struct BrigInstAddr repr
;
1403 hsa_op_address
*addr
= as_a
<hsa_op_address
*> (insn
->get_op (1));
1405 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1406 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_ADDR
);
1407 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1408 repr
.base
.type
= lendian16 (insn
->m_type
);
1409 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1412 repr
.segment
= addr
->m_symbol
->m_segment
;
1414 repr
.segment
= BRIG_SEGMENT_FLAT
;
1415 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1417 brig_code
.add (&repr
, sizeof (repr
));
1421 /* Emit an HSA segment conversion instruction and all necessary directives,
1422 schedule necessary operands for writing. */
1425 emit_segment_insn (hsa_insn_seg
*seg
)
1427 struct BrigInstSegCvt repr
;
1429 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1430 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SEG_CVT
);
1431 repr
.base
.opcode
= lendian16 (seg
->m_opcode
);
1432 repr
.base
.type
= lendian16 (seg
->m_type
);
1433 repr
.base
.operands
= lendian32 (emit_insn_operands (seg
));
1434 repr
.sourceType
= lendian16 (as_a
<hsa_op_reg
*> (seg
->get_op (1))->m_type
);
1435 repr
.segment
= seg
->m_segment
;
1438 brig_code
.add (&repr
, sizeof (repr
));
1443 /* Emit an HSA alloca instruction and all necessary directives,
1444 schedule necessary operands for writing. */
1447 emit_alloca_insn (hsa_insn_alloca
*alloca
)
1449 struct BrigInstMem repr
;
1450 gcc_checking_assert (alloca
->operand_count () == 2);
1452 memset (&repr
, 0, sizeof (repr
));
1453 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1454 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MEM
);
1455 repr
.base
.opcode
= lendian16 (alloca
->m_opcode
);
1456 repr
.base
.type
= lendian16 (alloca
->m_type
);
1457 repr
.base
.operands
= lendian32 (emit_insn_operands (alloca
));
1458 repr
.segment
= BRIG_SEGMENT_PRIVATE
;
1460 repr
.equivClass
= 0;
1461 repr
.align
= alloca
->m_align
;
1462 repr
.width
= BRIG_WIDTH_NONE
;
1463 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1464 brig_code
.add (&repr
, sizeof (repr
));
1468 /* Emit an HSA comparison instruction and all necessary directives,
1469 schedule necessary operands for writing. */
1472 emit_cmp_insn (hsa_insn_cmp
*cmp
)
1474 struct BrigInstCmp repr
;
1476 memset (&repr
, 0, sizeof (repr
));
1477 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1478 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CMP
);
1479 repr
.base
.opcode
= lendian16 (cmp
->m_opcode
);
1480 repr
.base
.type
= lendian16 (cmp
->m_type
);
1481 repr
.base
.operands
= lendian32 (emit_insn_operands (cmp
));
1483 if (is_a
<hsa_op_reg
*> (cmp
->get_op (1)))
1485 = lendian16 (as_a
<hsa_op_reg
*> (cmp
->get_op (1))->m_type
);
1488 = lendian16 (as_a
<hsa_op_immed
*> (cmp
->get_op (1))->m_type
);
1490 repr
.compare
= cmp
->m_compare
;
1493 brig_code
.add (&repr
, sizeof (repr
));
1497 /* Emit an HSA generic branching/sycnronization instruction. */
1500 emit_generic_branch_insn (hsa_insn_br
*br
)
1502 struct BrigInstBr repr
;
1503 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1504 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1505 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1506 repr
.width
= br
->m_width
;
1507 repr
.base
.type
= lendian16 (br
->m_type
);
1508 repr
.base
.operands
= lendian32 (emit_insn_operands (br
));
1509 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1511 brig_code
.add (&repr
, sizeof (repr
));
1515 /* Emit an HSA conditional branching instruction and all necessary directives,
1516 schedule necessary operands for writing. */
1519 emit_cond_branch_insn (hsa_insn_cbr
*br
)
1521 struct BrigInstBr repr
;
1523 basic_block target
= NULL
;
1527 /* At the moment we only handle direct conditional jumps. */
1528 gcc_assert (br
->m_opcode
== BRIG_OPCODE_CBR
);
1529 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1530 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1531 repr
.base
.opcode
= lendian16 (br
->m_opcode
);
1532 repr
.width
= br
->m_width
;
1533 /* For Conditional jumps the type is always B1. */
1534 repr
.base
.type
= lendian16 (BRIG_TYPE_B1
);
1536 FOR_EACH_EDGE (e
, ei
, br
->m_bb
->succs
)
1537 if (e
->flags
& EDGE_TRUE_VALUE
)
1542 gcc_assert (target
);
1545 = lendian32 (emit_operands (br
->get_op (0),
1546 &hsa_bb_for_bb (target
)->m_label_ref
));
1547 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1549 brig_code
.add (&repr
, sizeof (repr
));
1553 /* Emit an HSA unconditional jump branching instruction that points to
1554 a label REFERENCE. */
1557 emit_unconditional_jump (hsa_op_code_ref
*reference
)
1559 struct BrigInstBr repr
;
1561 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1562 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1563 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_BR
);
1564 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1565 /* Direct branches to labels must be width(all). */
1566 repr
.width
= BRIG_WIDTH_ALL
;
1568 repr
.base
.operands
= lendian32 (emit_operands (reference
));
1569 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1570 brig_code
.add (&repr
, sizeof (repr
));
1574 /* Emit an HSA switch jump instruction that uses a jump table to
1575 jump to a destination label. */
1578 emit_switch_insn (hsa_insn_sbr
*sbr
)
1580 struct BrigInstBr repr
;
1582 gcc_assert (sbr
->m_opcode
== BRIG_OPCODE_SBR
);
1583 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1584 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1585 repr
.base
.opcode
= lendian16 (sbr
->m_opcode
);
1586 repr
.width
= BRIG_WIDTH_1
;
1587 /* For Conditional jumps the type is always B1. */
1588 hsa_op_reg
*index
= as_a
<hsa_op_reg
*> (sbr
->get_op (0));
1589 repr
.base
.type
= lendian16 (index
->m_type
);
1591 = lendian32 (emit_operands (sbr
->get_op (0), sbr
->m_label_code_list
));
1592 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1594 brig_code
.add (&repr
, sizeof (repr
));
1598 /* Emit a HSA convert instruction and all necessary directives, schedule
1599 necessary operands for writing. */
1602 emit_cvt_insn (hsa_insn_cvt
*insn
)
1604 struct BrigInstCvt repr
;
1605 BrigType16_t srctype
;
1607 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1608 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_CVT
);
1609 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1610 repr
.base
.type
= lendian16 (insn
->m_type
);
1611 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1613 if (is_a
<hsa_op_reg
*> (insn
->get_op (1)))
1614 srctype
= as_a
<hsa_op_reg
*> (insn
->get_op (1))->m_type
;
1616 srctype
= as_a
<hsa_op_immed
*> (insn
->get_op (1))->m_type
;
1617 repr
.sourceType
= lendian16 (srctype
);
1619 /* float to smaller float requires a rounding setting (we default
1621 if (hsa_type_float_p (insn
->m_type
)
1622 && (!hsa_type_float_p (srctype
)
1623 || ((insn
->m_type
& BRIG_TYPE_BASE_MASK
)
1624 < (srctype
& BRIG_TYPE_BASE_MASK
))))
1625 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1626 else if (hsa_type_integer_p (insn
->m_type
) &&
1627 hsa_type_float_p (srctype
))
1628 repr
.round
= BRIG_ROUND_INTEGER_ZERO
;
1630 repr
.round
= BRIG_ROUND_NONE
;
1631 brig_code
.add (&repr
, sizeof (repr
));
1635 /* Emit call instruction INSN, where this instruction must be closed
1636 within a call block instruction. */
1639 emit_call_insn (hsa_insn_call
*call
)
1641 struct BrigInstBr repr
;
1643 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1644 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BR
);
1645 repr
.base
.opcode
= lendian16 (BRIG_OPCODE_CALL
);
1646 repr
.base
.type
= lendian16 (BRIG_TYPE_NONE
);
1649 = lendian32 (emit_operands (call
->m_result_code_list
, &call
->m_func
,
1650 call
->m_args_code_list
));
1652 /* Internal functions have not set m_called_function. */
1653 if (call
->m_called_function
)
1655 function_linkage_pair
pair (call
->m_called_function
,
1656 call
->m_func
.m_brig_op_offset
);
1657 function_call_linkage
.safe_push (pair
);
1661 hsa_internal_fn
*slot
1662 = hsa_emitted_internal_decls
->find (call
->m_called_internal_fn
);
1664 gcc_assert (slot
->m_offset
> 0);
1665 call
->m_func
.m_directive_offset
= slot
->m_offset
;
1668 repr
.width
= BRIG_WIDTH_ALL
;
1669 memset (&repr
.reserved
, 0, sizeof (repr
.reserved
));
1671 brig_code
.add (&repr
, sizeof (repr
));
1675 /* Emit argument block directive. */
1678 emit_arg_block_insn (hsa_insn_arg_block
*insn
)
1680 switch (insn
->m_kind
)
1682 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
:
1684 struct BrigDirectiveArgBlock repr
;
1685 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1686 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1687 brig_code
.add (&repr
, sizeof (repr
));
1689 for (unsigned i
= 0; i
< insn
->m_call_insn
->m_input_args
.length (); i
++)
1691 insn
->m_call_insn
->m_args_code_list
->m_offsets
[i
]
1692 = lendian32 (emit_directive_variable
1693 (insn
->m_call_insn
->m_input_args
[i
]));
1697 if (insn
->m_call_insn
->m_output_arg
)
1699 insn
->m_call_insn
->m_result_code_list
->m_offsets
[0]
1700 = lendian32 (emit_directive_variable
1701 (insn
->m_call_insn
->m_output_arg
));
1707 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END
:
1709 struct BrigDirectiveArgBlock repr
;
1710 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1711 repr
.base
.kind
= lendian16 (insn
->m_kind
);
1712 brig_code
.add (&repr
, sizeof (repr
));
1722 /* Emit comment directive. */
1725 emit_comment_insn (hsa_insn_comment
*insn
)
1727 struct BrigDirectiveComment repr
;
1728 memset (&repr
, 0, sizeof (repr
));
1730 repr
.base
.byteCount
= lendian16 (sizeof (repr
));
1731 repr
.base
.kind
= lendian16 (insn
->m_opcode
);
1732 repr
.name
= brig_emit_string (insn
->m_comment
, '\0', false);
1733 brig_code
.add (&repr
, sizeof (repr
));
1736 /* Emit queue instruction INSN. */
1739 emit_queue_insn (hsa_insn_queue
*insn
)
1742 memset (&repr
, 0, sizeof (repr
));
1744 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1745 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_QUEUE
);
1746 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1747 repr
.base
.type
= lendian16 (insn
->m_type
);
1748 repr
.segment
= insn
->m_segment
;
1749 repr
.memoryOrder
= insn
->m_memory_order
;
1750 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1751 brig_data
.round_size_up (4);
1752 brig_code
.add (&repr
, sizeof (repr
));
1757 /* Emit source type instruction INSN. */
1760 emit_srctype_insn (hsa_insn_srctype
*insn
)
1762 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1763 struct BrigInstSourceType repr
;
1764 unsigned operand_count
= insn
->operand_count ();
1765 gcc_checking_assert (operand_count
>= 2);
1767 memset (&repr
, 0, sizeof (repr
));
1768 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1769 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1770 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1771 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1772 repr
.base
.type
= lendian16 (insn
->m_type
);
1774 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1775 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1779 /* Emit packed instruction INSN. */
1782 emit_packed_insn (hsa_insn_packed
*insn
)
1784 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1785 struct BrigInstSourceType repr
;
1786 unsigned operand_count
= insn
->operand_count ();
1787 gcc_checking_assert (operand_count
>= 2);
1789 memset (&repr
, 0, sizeof (repr
));
1790 repr
.sourceType
= lendian16 (insn
->m_source_type
);
1791 repr
.base
.base
.byteCount
= lendian16 (sizeof (repr
));
1792 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_SOURCE_TYPE
);
1793 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1794 repr
.base
.type
= lendian16 (insn
->m_type
);
1796 if (insn
->m_opcode
== BRIG_OPCODE_COMBINE
)
1798 /* Create operand list for packed type. */
1799 for (unsigned i
= 1; i
< operand_count
; i
++)
1801 gcc_checking_assert (insn
->get_op (i
));
1802 insn
->m_operand_list
->m_offsets
[i
- 1]
1803 = lendian32 (enqueue_op (insn
->get_op (i
)));
1806 repr
.base
.operands
= lendian32 (emit_operands (insn
->get_op (0),
1807 insn
->m_operand_list
));
1809 else if (insn
->m_opcode
== BRIG_OPCODE_EXPAND
)
1811 /* Create operand list for packed type. */
1812 for (unsigned i
= 0; i
< operand_count
- 1; i
++)
1814 gcc_checking_assert (insn
->get_op (i
));
1815 insn
->m_operand_list
->m_offsets
[i
]
1816 = lendian32 (enqueue_op (insn
->get_op (i
)));
1819 unsigned ops
= emit_operands (insn
->m_operand_list
,
1820 insn
->get_op (insn
->operand_count () - 1));
1821 repr
.base
.operands
= lendian32 (ops
);
1825 brig_code
.add (&repr
, sizeof (struct BrigInstSourceType
));
1829 /* Emit a basic HSA instruction and all necessary directives, schedule
1830 necessary operands for writing. */
1833 emit_basic_insn (hsa_insn_basic
*insn
)
1835 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1836 struct BrigInstMod repr
;
1839 memset (&repr
, 0, sizeof (repr
));
1840 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstBasic
));
1841 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_BASIC
);
1842 repr
.base
.opcode
= lendian16 (insn
->m_opcode
);
1843 switch (insn
->m_opcode
)
1845 /* And the bit-logical operations need bit types and whine about
1846 arithmetic types :-/ */
1847 case BRIG_OPCODE_AND
:
1848 case BRIG_OPCODE_OR
:
1849 case BRIG_OPCODE_XOR
:
1850 case BRIG_OPCODE_NOT
:
1851 type
= regtype_for_type (insn
->m_type
);
1854 type
= insn
->m_type
;
1857 repr
.base
.type
= lendian16 (type
);
1858 repr
.base
.operands
= lendian32 (emit_insn_operands (insn
));
1860 if (hsa_type_packed_p (type
))
1862 if (hsa_type_float_p (type
)
1863 && !hsa_opcode_floating_bit_insn_p (insn
->m_opcode
))
1864 repr
.round
= BRIG_ROUND_FLOAT_NEAR_EVEN
;
1867 /* We assume that destination and sources agree in packing layout. */
1868 if (insn
->num_used_ops () >= 2)
1869 repr
.pack
= BRIG_PACK_PP
;
1871 repr
.pack
= BRIG_PACK_P
;
1873 repr
.base
.base
.byteCount
= lendian16 (sizeof (BrigInstMod
));
1874 repr
.base
.base
.kind
= lendian16 (BRIG_KIND_INST_MOD
);
1875 brig_code
.add (&repr
, sizeof (struct BrigInstMod
));
1878 brig_code
.add (&repr
, sizeof (struct BrigInstBasic
));
1882 /* Emit an HSA instruction and all necessary directives, schedule necessary
1883 operands for writing. */
1886 emit_insn (hsa_insn_basic
*insn
)
1888 gcc_assert (!is_a
<hsa_insn_phi
*> (insn
));
1890 insn
->m_brig_offset
= brig_code
.total_size
;
1892 if (hsa_insn_signal
*signal
= dyn_cast
<hsa_insn_signal
*> (insn
))
1893 emit_signal_insn (signal
);
1894 else if (hsa_insn_atomic
*atom
= dyn_cast
<hsa_insn_atomic
*> (insn
))
1895 emit_atomic_insn (atom
);
1896 else if (hsa_insn_mem
*mem
= dyn_cast
<hsa_insn_mem
*> (insn
))
1897 emit_memory_insn (mem
);
1898 else if (insn
->m_opcode
== BRIG_OPCODE_LDA
)
1899 emit_addr_insn (insn
);
1900 else if (hsa_insn_seg
*seg
= dyn_cast
<hsa_insn_seg
*> (insn
))
1901 emit_segment_insn (seg
);
1902 else if (hsa_insn_cmp
*cmp
= dyn_cast
<hsa_insn_cmp
*> (insn
))
1903 emit_cmp_insn (cmp
);
1904 else if (hsa_insn_cbr
*br
= dyn_cast
<hsa_insn_cbr
*> (insn
))
1905 emit_cond_branch_insn (br
);
1906 else if (hsa_insn_sbr
*sbr
= dyn_cast
<hsa_insn_sbr
*> (insn
))
1908 if (switch_instructions
== NULL
)
1909 switch_instructions
= new vec
<hsa_insn_sbr
*> ();
1911 switch_instructions
->safe_push (sbr
);
1912 emit_switch_insn (sbr
);
1914 else if (hsa_insn_br
*br
= dyn_cast
<hsa_insn_br
*> (insn
))
1915 emit_generic_branch_insn (br
);
1916 else if (hsa_insn_arg_block
*block
= dyn_cast
<hsa_insn_arg_block
*> (insn
))
1917 emit_arg_block_insn (block
);
1918 else if (hsa_insn_call
*call
= dyn_cast
<hsa_insn_call
*> (insn
))
1919 emit_call_insn (call
);
1920 else if (hsa_insn_comment
*comment
= dyn_cast
<hsa_insn_comment
*> (insn
))
1921 emit_comment_insn (comment
);
1922 else if (hsa_insn_queue
*queue
= dyn_cast
<hsa_insn_queue
*> (insn
))
1923 emit_queue_insn (queue
);
1924 else if (hsa_insn_srctype
*srctype
= dyn_cast
<hsa_insn_srctype
*> (insn
))
1925 emit_srctype_insn (srctype
);
1926 else if (hsa_insn_packed
*packed
= dyn_cast
<hsa_insn_packed
*> (insn
))
1927 emit_packed_insn (packed
);
1928 else if (hsa_insn_cvt
*cvt
= dyn_cast
<hsa_insn_cvt
*> (insn
))
1929 emit_cvt_insn (cvt
);
1930 else if (hsa_insn_alloca
*alloca
= dyn_cast
<hsa_insn_alloca
*> (insn
))
1931 emit_alloca_insn (alloca
);
1933 emit_basic_insn (insn
);
1936 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1937 or we are about to finish emitting code, if it is NULL. If the fall through
1938 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1941 perhaps_emit_branch (basic_block bb
, basic_block next_bb
)
1943 basic_block t_bb
= NULL
, ff
= NULL
;
1948 /* If the last instruction of BB is a switch, ignore emission of all
1950 if (hsa_bb_for_bb (bb
)->m_last_insn
1951 && is_a
<hsa_insn_sbr
*> (hsa_bb_for_bb (bb
)->m_last_insn
))
1954 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1955 if (e
->flags
& EDGE_TRUE_VALUE
)
1966 if (!ff
|| ff
== next_bb
|| ff
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1969 emit_unconditional_jump (&hsa_bb_for_bb (ff
)->m_label_ref
);
1972 /* Emit the a function with name NAME to the various brig sections. */
1975 hsa_brig_emit_function (void)
1977 basic_block bb
, prev_bb
;
1978 hsa_insn_basic
*insn
;
1979 BrigDirectiveExecutable
*ptr_to_fndir
;
1983 brig_insn_count
= 0;
1984 memset (&op_queue
, 0, sizeof (op_queue
));
1985 op_queue
.projected_size
= brig_operand
.total_size
;
1987 if (!function_offsets
)
1988 function_offsets
= new hash_map
<tree
, BrigCodeOffset32_t
> ();
1990 if (!emitted_declarations
)
1991 emitted_declarations
= new hash_map
<tree
, BrigDirectiveExecutable
*> ();
1993 for (unsigned i
= 0; i
< hsa_cfun
->m_called_functions
.length (); i
++)
1995 tree called
= hsa_cfun
->m_called_functions
[i
];
1997 /* If the function has no definition, emit a declaration. */
1998 if (!emitted_declarations
->get (called
))
2000 BrigDirectiveExecutable
*e
= emit_function_declaration (called
);
2001 emitted_declarations
->put (called
, e
);
2005 for (unsigned i
= 0; i
< hsa_cfun
->m_called_internal_fns
.length (); i
++)
2007 hsa_internal_fn
*called
= hsa_cfun
->m_called_internal_fns
[i
];
2008 emit_internal_fn_decl (called
);
2011 ptr_to_fndir
= emit_function_directives (hsa_cfun
, false);
2012 for (insn
= hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->m_first_insn
;
2014 insn
= insn
->m_next
)
2016 prev_bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2017 FOR_EACH_BB_FN (bb
, cfun
)
2019 perhaps_emit_branch (prev_bb
, bb
);
2020 emit_bb_label_directive (hsa_bb_for_bb (bb
));
2021 for (insn
= hsa_bb_for_bb (bb
)->m_first_insn
; insn
; insn
= insn
->m_next
)
2025 perhaps_emit_branch (prev_bb
, NULL
);
2026 ptr_to_fndir
->nextModuleEntry
= lendian32 (brig_code
.total_size
);
2028 /* Fill up label references for all sbr instructions. */
2029 if (switch_instructions
)
2031 for (unsigned i
= 0; i
< switch_instructions
->length (); i
++)
2033 hsa_insn_sbr
*sbr
= (*switch_instructions
)[i
];
2034 for (unsigned j
= 0; j
< sbr
->m_jump_table
.length (); j
++)
2036 hsa_bb
*hbb
= hsa_bb_for_bb (sbr
->m_jump_table
[j
]);
2037 sbr
->m_label_code_list
->m_offsets
[j
]
2038 = hbb
->m_label_ref
.m_directive_offset
;
2042 switch_instructions
->release ();
2043 delete switch_instructions
;
2044 switch_instructions
= NULL
;
2049 fprintf (dump_file
, "------- After BRIG emission: -------\n");
2050 dump_hsa_cfun (dump_file
);
2053 emit_queued_operands ();
2056 /* Emit all OMP symbols related to OMP. */
2059 hsa_brig_emit_omp_symbols (void)
2062 emit_directive_variable (hsa_num_threads
);
2065 /* Create and return __hsa_global_variables symbol that contains
2066 all informations consumed by libgomp to link global variables
2067 with their string names used by an HSA kernel. */
2070 hsa_output_global_variables ()
2072 unsigned l
= hsa_global_variable_symbols
->elements ();
2074 tree variable_info_type
= make_node (RECORD_TYPE
);
2075 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2076 get_identifier ("name"), ptr_type_node
);
2077 DECL_CHAIN (id_f1
) = NULL_TREE
;
2078 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2079 get_identifier ("omp_data_size"),
2081 DECL_CHAIN (id_f2
) = id_f1
;
2082 finish_builtin_struct (variable_info_type
, "__hsa_variable_info", id_f2
,
2085 tree int_num_of_global_vars
;
2086 int_num_of_global_vars
= build_int_cst (uint32_type_node
, l
);
2087 tree global_vars_num_index_type
= build_index_type (int_num_of_global_vars
);
2088 tree global_vars_array_type
= build_array_type (variable_info_type
,
2089 global_vars_num_index_type
);
2090 TYPE_ARTIFICIAL (global_vars_array_type
) = 1;
2092 vec
<constructor_elt
, va_gc
> *global_vars_vec
= NULL
;
2094 for (hash_table
<hsa_noop_symbol_hasher
>::iterator it
2095 = hsa_global_variable_symbols
->begin ();
2096 it
!= hsa_global_variable_symbols
->end (); ++it
)
2098 unsigned len
= strlen ((*it
)->m_name
);
2099 char *copy
= XNEWVEC (char, len
+ 2);
2101 memcpy (copy
+ 1, (*it
)->m_name
, len
);
2102 copy
[len
+ 1] = '\0';
2104 hsa_sanitize_name (copy
);
2106 tree var_name
= build_string (len
, copy
);
2107 TREE_TYPE (var_name
)
2108 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2111 vec
<constructor_elt
, va_gc
> *variable_info_vec
= NULL
;
2112 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2114 build_pointer_type (TREE_TYPE (var_name
)),
2116 CONSTRUCTOR_APPEND_ELT (variable_info_vec
, NULL_TREE
,
2117 build_fold_addr_expr ((*it
)->m_decl
));
2119 tree variable_info_ctor
= build_constructor (variable_info_type
,
2122 CONSTRUCTOR_APPEND_ELT (global_vars_vec
, NULL_TREE
,
2123 variable_info_ctor
);
2126 tree global_vars_ctor
= build_constructor (global_vars_array_type
,
2130 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_global_variables", 1);
2131 tree global_vars_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2132 get_identifier (tmp_name
),
2133 global_vars_array_type
);
2134 TREE_STATIC (global_vars_table
) = 1;
2135 TREE_READONLY (global_vars_table
) = 1;
2136 TREE_PUBLIC (global_vars_table
) = 0;
2137 DECL_ARTIFICIAL (global_vars_table
) = 1;
2138 DECL_IGNORED_P (global_vars_table
) = 1;
2139 DECL_EXTERNAL (global_vars_table
) = 0;
2140 TREE_CONSTANT (global_vars_table
) = 1;
2141 DECL_INITIAL (global_vars_table
) = global_vars_ctor
;
2142 varpool_node::finalize_decl (global_vars_table
);
2144 return global_vars_table
;
2147 /* Create __hsa_host_functions and __hsa_kernels that contain
2148 all informations consumed by libgomp to register all kernels
2149 in the BRIG binary. */
2152 hsa_output_kernels (tree
*host_func_table
, tree
*kernels
)
2154 unsigned map_count
= hsa_get_number_decl_kernel_mappings ();
2156 tree int_num_of_kernels
;
2157 int_num_of_kernels
= build_int_cst (uint32_type_node
, map_count
);
2158 tree kernel_num_index_type
= build_index_type (int_num_of_kernels
);
2159 tree host_functions_array_type
= build_array_type (ptr_type_node
,
2160 kernel_num_index_type
);
2161 TYPE_ARTIFICIAL (host_functions_array_type
) = 1;
2163 vec
<constructor_elt
, va_gc
> *host_functions_vec
= NULL
;
2164 for (unsigned i
= 0; i
< map_count
; ++i
)
2166 tree decl
= hsa_get_decl_kernel_mapping_decl (i
);
2167 tree host_fn
= build_fold_addr_expr (hsa_get_host_function (decl
));
2168 CONSTRUCTOR_APPEND_ELT (host_functions_vec
, NULL_TREE
, host_fn
);
2170 tree host_functions_ctor
= build_constructor (host_functions_array_type
,
2171 host_functions_vec
);
2173 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_host_functions", 1);
2174 tree hsa_host_func_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2175 get_identifier (tmp_name
),
2176 host_functions_array_type
);
2177 TREE_STATIC (hsa_host_func_table
) = 1;
2178 TREE_READONLY (hsa_host_func_table
) = 1;
2179 TREE_PUBLIC (hsa_host_func_table
) = 0;
2180 DECL_ARTIFICIAL (hsa_host_func_table
) = 1;
2181 DECL_IGNORED_P (hsa_host_func_table
) = 1;
2182 DECL_EXTERNAL (hsa_host_func_table
) = 0;
2183 TREE_CONSTANT (hsa_host_func_table
) = 1;
2184 DECL_INITIAL (hsa_host_func_table
) = host_functions_ctor
;
2185 varpool_node::finalize_decl (hsa_host_func_table
);
2186 *host_func_table
= hsa_host_func_table
;
2188 /* Following code emits list of kernel_info structures. */
2190 tree kernel_info_type
= make_node (RECORD_TYPE
);
2191 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2192 get_identifier ("name"), ptr_type_node
);
2193 DECL_CHAIN (id_f1
) = NULL_TREE
;
2194 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2195 get_identifier ("omp_data_size"),
2196 unsigned_type_node
);
2197 DECL_CHAIN (id_f2
) = id_f1
;
2198 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2199 get_identifier ("gridified_kernel_p"),
2201 DECL_CHAIN (id_f3
) = id_f2
;
2202 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2203 get_identifier ("kernel_dependencies_count"),
2204 unsigned_type_node
);
2205 DECL_CHAIN (id_f4
) = id_f3
;
2206 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2207 get_identifier ("kernel_dependencies"),
2208 build_pointer_type (build_pointer_type
2210 DECL_CHAIN (id_f5
) = id_f4
;
2211 finish_builtin_struct (kernel_info_type
, "__hsa_kernel_info", id_f5
,
2214 int_num_of_kernels
= build_int_cstu (uint32_type_node
, map_count
);
2215 tree kernel_info_vector_type
2216 = build_array_type (kernel_info_type
,
2217 build_index_type (int_num_of_kernels
));
2218 TYPE_ARTIFICIAL (kernel_info_vector_type
) = 1;
2220 vec
<constructor_elt
, va_gc
> *kernel_info_vector_vec
= NULL
;
2221 tree kernel_dependencies_vector_type
= NULL
;
2223 for (unsigned i
= 0; i
< map_count
; ++i
)
2225 tree kernel
= hsa_get_decl_kernel_mapping_decl (i
);
2226 char *name
= hsa_get_decl_kernel_mapping_name (i
);
2227 unsigned len
= strlen (name
);
2228 char *copy
= XNEWVEC (char, len
+ 2);
2230 memcpy (copy
+ 1, name
, len
);
2231 copy
[len
+ 1] = '\0';
2234 tree kern_name
= build_string (len
, copy
);
2235 TREE_TYPE (kern_name
)
2236 = build_array_type (char_type_node
, build_index_type (size_int (len
)));
2239 unsigned omp_size
= hsa_get_decl_kernel_mapping_omp_size (i
);
2240 tree omp_data_size
= build_int_cstu (unsigned_type_node
, omp_size
);
2241 bool gridified_kernel_p
= hsa_get_decl_kernel_mapping_gridified (i
);
2242 tree gridified_kernel_p_tree
= build_int_cstu (boolean_type_node
,
2243 gridified_kernel_p
);
2245 vec
<constructor_elt
, va_gc
> *kernel_dependencies_vec
= NULL
;
2246 if (hsa_decl_kernel_dependencies
)
2248 vec
<const char *> **slot
;
2249 slot
= hsa_decl_kernel_dependencies
->get (kernel
);
2252 vec
<const char *> *dependencies
= *slot
;
2253 count
= dependencies
->length ();
2255 kernel_dependencies_vector_type
2256 = build_array_type (build_pointer_type (char_type_node
),
2257 build_index_type (size_int (count
)));
2258 TYPE_ARTIFICIAL (kernel_dependencies_vector_type
) = 1;
2260 for (unsigned j
= 0; j
< count
; j
++)
2262 const char *d
= (*dependencies
)[j
];
2264 tree dependency_name
= build_string (len
, d
);
2265 TREE_TYPE (dependency_name
)
2266 = build_array_type (char_type_node
,
2267 build_index_type (size_int (len
)));
2269 CONSTRUCTOR_APPEND_ELT
2270 (kernel_dependencies_vec
, NULL_TREE
,
2272 build_pointer_type (TREE_TYPE (dependency_name
)),
2278 tree dependencies_count
= build_int_cstu (unsigned_type_node
, count
);
2280 vec
<constructor_elt
, va_gc
> *kernel_info_vec
= NULL
;
2281 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2283 build_pointer_type (TREE_TYPE
2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, omp_data_size
);
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2288 gridified_kernel_p_tree
);
2289 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, dependencies_count
);
2293 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_dependencies_list", i
);
2294 gcc_checking_assert (kernel_dependencies_vector_type
);
2295 tree dependencies_list
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2296 get_identifier (tmp_name
),
2297 kernel_dependencies_vector_type
);
2299 TREE_STATIC (dependencies_list
) = 1;
2300 TREE_READONLY (dependencies_list
) = 1;
2301 TREE_PUBLIC (dependencies_list
) = 0;
2302 DECL_ARTIFICIAL (dependencies_list
) = 1;
2303 DECL_IGNORED_P (dependencies_list
) = 1;
2304 DECL_EXTERNAL (dependencies_list
) = 0;
2305 TREE_CONSTANT (dependencies_list
) = 1;
2306 DECL_INITIAL (dependencies_list
)
2307 = build_constructor (kernel_dependencies_vector_type
,
2308 kernel_dependencies_vec
);
2309 varpool_node::finalize_decl (dependencies_list
);
2311 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
,
2314 (TREE_TYPE (dependencies_list
)),
2315 dependencies_list
));
2318 CONSTRUCTOR_APPEND_ELT (kernel_info_vec
, NULL_TREE
, null_pointer_node
);
2320 tree kernel_info_ctor
= build_constructor (kernel_info_type
,
2323 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec
, NULL_TREE
,
2327 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_kernels", 1);
2328 tree hsa_kernels
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2329 get_identifier (tmp_name
),
2330 kernel_info_vector_type
);
2332 TREE_STATIC (hsa_kernels
) = 1;
2333 TREE_READONLY (hsa_kernels
) = 1;
2334 TREE_PUBLIC (hsa_kernels
) = 0;
2335 DECL_ARTIFICIAL (hsa_kernels
) = 1;
2336 DECL_IGNORED_P (hsa_kernels
) = 1;
2337 DECL_EXTERNAL (hsa_kernels
) = 0;
2338 TREE_CONSTANT (hsa_kernels
) = 1;
2339 DECL_INITIAL (hsa_kernels
) = build_constructor (kernel_info_vector_type
,
2340 kernel_info_vector_vec
);
2341 varpool_node::finalize_decl (hsa_kernels
);
2342 *kernels
= hsa_kernels
;
2345 /* Create a static constructor that will register out brig stuff with
2349 hsa_output_libgomp_mapping (tree brig_decl
)
2351 unsigned kernel_count
= hsa_get_number_decl_kernel_mappings ();
2352 unsigned global_variable_count
= hsa_global_variable_symbols
->elements ();
2355 tree host_func_table
;
2357 hsa_output_kernels (&host_func_table
, &kernels
);
2358 tree global_vars
= hsa_output_global_variables ();
2360 tree hsa_image_desc_type
= make_node (RECORD_TYPE
);
2361 tree id_f1
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2362 get_identifier ("brig_module"), ptr_type_node
);
2363 DECL_CHAIN (id_f1
) = NULL_TREE
;
2364 tree id_f2
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2365 get_identifier ("kernel_count"),
2366 unsigned_type_node
);
2368 DECL_CHAIN (id_f2
) = id_f1
;
2369 tree id_f3
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2370 get_identifier ("hsa_kernel_infos"),
2372 DECL_CHAIN (id_f3
) = id_f2
;
2373 tree id_f4
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2374 get_identifier ("global_variable_count"),
2375 unsigned_type_node
);
2376 DECL_CHAIN (id_f4
) = id_f3
;
2377 tree id_f5
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
2378 get_identifier ("hsa_global_variable_infos"),
2380 DECL_CHAIN (id_f5
) = id_f4
;
2381 finish_builtin_struct (hsa_image_desc_type
, "__hsa_image_desc", id_f5
,
2383 TYPE_ARTIFICIAL (hsa_image_desc_type
) = 1;
2385 vec
<constructor_elt
, va_gc
> *img_desc_vec
= NULL
;
2386 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2387 build_fold_addr_expr (brig_decl
));
2388 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2389 build_int_cstu (unsigned_type_node
, kernel_count
));
2390 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2392 build_pointer_type (TREE_TYPE (kernels
)),
2394 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2395 build_int_cstu (unsigned_type_node
,
2396 global_variable_count
));
2397 CONSTRUCTOR_APPEND_ELT (img_desc_vec
, NULL_TREE
,
2399 build_pointer_type (TREE_TYPE (global_vars
)),
2402 tree img_desc_ctor
= build_constructor (hsa_image_desc_type
, img_desc_vec
);
2405 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_img_descriptor", 1);
2406 tree hsa_img_descriptor
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2407 get_identifier (tmp_name
),
2408 hsa_image_desc_type
);
2409 TREE_STATIC (hsa_img_descriptor
) = 1;
2410 TREE_READONLY (hsa_img_descriptor
) = 1;
2411 TREE_PUBLIC (hsa_img_descriptor
) = 0;
2412 DECL_ARTIFICIAL (hsa_img_descriptor
) = 1;
2413 DECL_IGNORED_P (hsa_img_descriptor
) = 1;
2414 DECL_EXTERNAL (hsa_img_descriptor
) = 0;
2415 TREE_CONSTANT (hsa_img_descriptor
) = 1;
2416 DECL_INITIAL (hsa_img_descriptor
) = img_desc_ctor
;
2417 varpool_node::finalize_decl (hsa_img_descriptor
);
2419 /* Construct the "host_table" libgomp expects. */
2420 tree index_type
= build_index_type (build_int_cst (integer_type_node
, 4));
2421 tree libgomp_host_table_type
= build_array_type (ptr_type_node
, index_type
);
2422 TYPE_ARTIFICIAL (libgomp_host_table_type
) = 1;
2423 vec
<constructor_elt
, va_gc
> *libgomp_host_table_vec
= NULL
;
2424 tree host_func_table_addr
= build_fold_addr_expr (host_func_table
);
2425 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2426 host_func_table_addr
);
2427 offset_int func_table_size
2428 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node
)) * kernel_count
;
2429 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
,
2430 fold_build2 (POINTER_PLUS_EXPR
,
2431 TREE_TYPE (host_func_table_addr
),
2432 host_func_table_addr
,
2433 build_int_cst (size_type_node
,
2434 func_table_size
.to_uhwi
2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec
, NULL_TREE
, null_pointer_node
);
2438 tree libgomp_host_table_ctor
= build_constructor (libgomp_host_table_type
,
2439 libgomp_host_table_vec
);
2440 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "__hsa_libgomp_host_table", 1);
2441 tree hsa_libgomp_host_table
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
2442 get_identifier (tmp_name
),
2443 libgomp_host_table_type
);
2445 TREE_STATIC (hsa_libgomp_host_table
) = 1;
2446 TREE_READONLY (hsa_libgomp_host_table
) = 1;
2447 TREE_PUBLIC (hsa_libgomp_host_table
) = 0;
2448 DECL_ARTIFICIAL (hsa_libgomp_host_table
) = 1;
2449 DECL_IGNORED_P (hsa_libgomp_host_table
) = 1;
2450 DECL_EXTERNAL (hsa_libgomp_host_table
) = 0;
2451 TREE_CONSTANT (hsa_libgomp_host_table
) = 1;
2452 DECL_INITIAL (hsa_libgomp_host_table
) = libgomp_host_table_ctor
;
2453 varpool_node::finalize_decl (hsa_libgomp_host_table
);
2455 /* Generate an initializer with a call to the registration routine. */
2457 tree offload_register
2458 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER
);
2459 gcc_checking_assert (offload_register
);
2461 tree
*hsa_ctor_stmts
= hsa_get_ctor_statements ();
2462 append_to_statement_list
2463 (build_call_expr (offload_register
, 4,
2464 build_int_cstu (unsigned_type_node
,
2465 GOMP_VERSION_PACK (GOMP_VERSION
,
2467 build_fold_addr_expr (hsa_libgomp_host_table
),
2468 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2469 build_fold_addr_expr (hsa_img_descriptor
)),
2472 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts
, DEFAULT_INIT_PRIORITY
);
2474 tree offload_unregister
2475 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER
);
2476 gcc_checking_assert (offload_unregister
);
2478 tree
*hsa_dtor_stmts
= hsa_get_dtor_statements ();
2479 append_to_statement_list
2480 (build_call_expr (offload_unregister
, 4,
2481 build_int_cstu (unsigned_type_node
,
2482 GOMP_VERSION_PACK (GOMP_VERSION
,
2484 build_fold_addr_expr (hsa_libgomp_host_table
),
2485 build_int_cst (integer_type_node
, GOMP_DEVICE_HSA
),
2486 build_fold_addr_expr (hsa_img_descriptor
)),
2488 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts
, DEFAULT_INIT_PRIORITY
);
2491 /* Emit the brig module we have compiled to a section in the final assembly and
2492 also create a compile unit static constructor that will register the brig
2493 module with libgomp. */
2496 hsa_output_brig (void)
2498 section
*saved_section
;
2500 if (!brig_initialized
)
2503 for (unsigned i
= 0; i
< function_call_linkage
.length (); i
++)
2505 function_linkage_pair p
= function_call_linkage
[i
];
2507 BrigCodeOffset32_t
*func_offset
= function_offsets
->get (p
.function_decl
);
2508 gcc_assert (*func_offset
);
2509 BrigOperandCodeRef
*code_ref
2510 = (BrigOperandCodeRef
*) (brig_operand
.get_ptr_by_offset (p
.offset
));
2511 gcc_assert (code_ref
->base
.kind
== BRIG_KIND_OPERAND_CODE_REF
);
2512 code_ref
->ref
= lendian32 (*func_offset
);
2515 /* Iterate all function declarations and if we meet a function that should
2516 have module linkage and we are unable to emit HSAIL for the function,
2517 then change the linkage to program linkage. Doing so, we will emit
2518 a valid BRIG image. */
2519 if (hsa_failed_functions
!= NULL
&& emitted_declarations
!= NULL
)
2520 for (hash_map
<tree
, BrigDirectiveExecutable
*>::iterator it
2521 = emitted_declarations
->begin ();
2522 it
!= emitted_declarations
->end ();
2525 if (hsa_failed_functions
->contains ((*it
).first
))
2526 (*it
).second
->linkage
= BRIG_LINKAGE_PROGRAM
;
2529 saved_section
= in_section
;
2531 switch_to_section (get_section (BRIG_ELF_SECTION_NAME
, SECTION_NOTYPE
, NULL
));
2533 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, BRIG_LABEL_STRING
, 1);
2534 ASM_OUTPUT_LABEL (asm_out_file
, tmp_name
);
2535 tree brig_id
= get_identifier (tmp_name
);
2536 tree brig_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, brig_id
,
2538 SET_DECL_ASSEMBLER_NAME (brig_decl
, brig_id
);
2539 TREE_ADDRESSABLE (brig_decl
) = 1;
2540 TREE_READONLY (brig_decl
) = 1;
2541 DECL_ARTIFICIAL (brig_decl
) = 1;
2542 DECL_IGNORED_P (brig_decl
) = 1;
2543 TREE_STATIC (brig_decl
) = 1;
2544 TREE_PUBLIC (brig_decl
) = 0;
2545 TREE_USED (brig_decl
) = 1;
2546 DECL_INITIAL (brig_decl
) = brig_decl
;
2547 TREE_ASM_WRITTEN (brig_decl
) = 1;
2549 BrigModuleHeader module_header
;
2550 memcpy (&module_header
.identification
, "HSA BRIG",
2551 sizeof (module_header
.identification
));
2552 module_header
.brigMajor
= lendian32 (BRIG_VERSION_BRIG_MAJOR
);
2553 module_header
.brigMinor
= lendian32 (BRIG_VERSION_BRIG_MINOR
);
2554 uint64_t section_index
[3];
2556 int data_padding
, code_padding
, operand_padding
;
2557 data_padding
= HSA_SECTION_ALIGNMENT
2558 - brig_data
.total_size
% HSA_SECTION_ALIGNMENT
;
2559 code_padding
= HSA_SECTION_ALIGNMENT
2560 - brig_code
.total_size
% HSA_SECTION_ALIGNMENT
;
2561 operand_padding
= HSA_SECTION_ALIGNMENT
2562 - brig_operand
.total_size
% HSA_SECTION_ALIGNMENT
;
2564 uint64_t module_size
= sizeof (module_header
)
2565 + sizeof (section_index
)
2566 + brig_data
.total_size
2568 + brig_code
.total_size
2570 + brig_operand
.total_size
2572 gcc_assert ((module_size
% 16) == 0);
2573 module_header
.byteCount
= lendian64 (module_size
);
2574 memset (&module_header
.hash
, 0, sizeof (module_header
.hash
));
2575 module_header
.reserved
= 0;
2576 module_header
.sectionCount
= lendian32 (3);
2577 module_header
.sectionIndex
= lendian64 (sizeof (module_header
));
2578 assemble_string ((const char *) &module_header
, sizeof (module_header
));
2579 uint64_t off
= sizeof (module_header
) + sizeof (section_index
);
2580 section_index
[0] = lendian64 (off
);
2581 off
+= brig_data
.total_size
+ data_padding
;
2582 section_index
[1] = lendian64 (off
);
2583 off
+= brig_code
.total_size
+ code_padding
;
2584 section_index
[2] = lendian64 (off
);
2585 assemble_string ((const char *) §ion_index
, sizeof (section_index
));
2587 char padding
[HSA_SECTION_ALIGNMENT
];
2588 memset (padding
, 0, sizeof (padding
));
2590 brig_data
.output ();
2591 assemble_string (padding
, data_padding
);
2592 brig_code
.output ();
2593 assemble_string (padding
, code_padding
);
2594 brig_operand
.output ();
2595 assemble_string (padding
, operand_padding
);
2598 switch_to_section (saved_section
);
2600 hsa_output_libgomp_mapping (brig_decl
);
2602 hsa_free_decl_kernel_mapping ();
2603 brig_release_data ();
2604 hsa_deinit_compilation_unit_data ();
2606 delete emitted_declarations
;
2607 emitted_declarations
= NULL
;
2608 delete function_offsets
;
2609 function_offsets
= NULL
;